From a20c93e3160e37ecccc738d8eef085c8507949ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:21 +0200 Subject: nfs: remove ->write_pageio_init from rpc ops The write_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_write based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cb53d450ae32..9edac9f01c2a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1447,20 +1447,6 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, - int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); -} - bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) @@ -1496,7 +1482,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); -- cgit v1.2.3 From fab5fc25d230edcc8ee72367e505955a2fae0cac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:22 +0200 Subject: nfs: remove ->read_pageio_init from rpc ops The read_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_read based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9edac9f01c2a..3d5bc2baafd1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1434,19 +1434,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_read(pgio, inode, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); -} - bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) @@ -1641,7 +1628,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, true, compl_ops); pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); -- cgit v1.2.3 From 9c7e1b3d50b56b8d8f6237ed232350b7c6476cd5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:26 -0400 Subject: NFS: Create a common read and write data struct At this point, the only difference between nfs_read_data and nfs_write_data is the write verifier. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3d5bc2baafd1..e9cea3ab7cf9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1492,7 +1492,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1511,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_write_data *data) +void pnfs_ld_write_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1527,7 +1527,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1540,7 +1540,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_data *wdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) @@ -1564,7 +1564,7 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, static void pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; @@ -1572,7 +1572,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_first_entry(head, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); @@ -1647,7 +1647,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1666,7 +1666,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_read_data *data) +void pnfs_ld_read_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1682,7 +1682,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1698,7 +1698,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_data *rdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { @@ -1722,7 +1722,7 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, static void pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) { - struct nfs_read_data *data; + struct nfs_pgio_data *data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; @@ -1730,7 +1730,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_first_entry(head, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); @@ -1821,7 +1821,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_write_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; -- cgit v1.2.3 From c0752cdfbbb691cfe98812f7aed8ce1e766823c4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:27 -0400 Subject: NFS: Create a common read and write header struct The only difference is the write verifier field, but we can keep that for a little bit longer. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e9cea3ab7cf9..43cfe11aa1a4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1592,7 +1592,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; @@ -1750,7 +1750,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; -- cgit v1.2.3 From 00bfa30abe86982ce1929e9cabd703e5546106bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:29 -0400 Subject: NFS: Create a common pgio_alloc and pgio_release function These functions are identical for the read and write paths so they can be combined. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 43cfe11aa1a4..e192ba69a7d4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1536,7 +1536,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_writedata_release(data); + nfs_pgio_data_release(data); } static enum pnfs_try_status @@ -1691,7 +1691,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_readdata_release(data); + nfs_pgio_data_release(data); } /* -- cgit v1.2.3 From 4a0de55c565a36cac8422b76a948c4634a90781e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:30 -0400 Subject: NFS: Create a common rw_header_alloc and rw_header_free function I create a new struct nfs_rw_ops to decide the differences between reads and writes. This struct will be set when initializing a new nfs_pgio_descriptor, and then passed on to the nfs_rw_header when a new header is allocated. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e192ba69a7d4..54c84c128b2b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1585,7 +1585,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_writehdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); @@ -1596,7 +1596,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); @@ -1743,7 +1743,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_readhdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); @@ -1754,7 +1754,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_readhdr_alloc(); + rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; -- cgit v1.2.3 From ef2c488c073f4f0b3a200745dd8d608c01d69c39 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:36 -0400 Subject: NFS: Create a generic_pgio function These functions are almost identical on both the read and write side. FLUSH_COND_STABLE will never be set for the read path, so leaving it in the generic code won't hurt anything. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 54c84c128b2b..0fe670189fd1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1607,7 +1607,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); - ret = nfs_generic_flush(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; @@ -1766,7 +1766,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); - ret = nfs_generic_pagein(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; -- cgit v1.2.3 From b4fdac1a5150174df0847a45dc6612ce5ce3daeb Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:43 -0400 Subject: nfs: modify pg_test interface to return size_t This is a step toward allowing pg_test to inform the the coalescing code to reduce the size of requests so they may fit in whatever scheme the pg_test callback wants to define. For now, just return the size of the request if there is space, or 0 if there is not. This shouldn't change any behavior as it acts the same as when the pg_test functions returned bool. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fe670189fd1..de6eb16f94d1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1434,7 +1434,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -1455,8 +1459,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); + if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return req->wb_bytes; + return 0; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -- cgit v1.2.3 From 0f9c429eca07aca2764ccd751e2b48ba5397b936 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:51 -0400 Subject: nfs: chain calls to pg_test Now that pg_test can change the size of the request (by returning a non-zero size smaller than the request), pg_test functions that call other pg_test functions must return the minimum of the result - or 0 if any fail. Also clean up the logic of some pg_test functions so that all checks are for contitions where coalescing is not possible. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index de6eb16f94d1..354c53cd4095 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1442,8 +1442,12 @@ size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (pgio->pg_lseg == NULL) - return nfs_generic_pg_test(pgio, prev, req); + unsigned int size; + + size = nfs_generic_pg_test(pgio, prev, req); + + if (!size) + return 0; /* * Test if a nfs_page is fully contained in the pnfs_layout_range. @@ -1459,10 +1463,11 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + if (req_offset(req) >= end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) - return req->wb_bytes; - return 0; + return 0; + + return min(size, req->wb_bytes); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -- cgit v1.2.3 From 7f714720fac03383d687dbe39494cc96b845bd46 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:53 -0400 Subject: nfs: remove data list from pgio header Since the ability to split pages into subpage requests has been added, nfs_pgio_header->rpc_list only ever has one pgio data. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 354c53cd4095..6ef108b1d85f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1573,23 +1573,18 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata, } static void -pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +pnfs_do_write(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); - } + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); pnfs_put_lseg(lseg); } @@ -1623,7 +1618,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); + pnfs_do_write(desc, hdr, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; @@ -1731,23 +1726,17 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, } static void -pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); - } + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); pnfs_put_lseg(lseg); } @@ -1782,7 +1771,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_reads(desc, &hdr->rpc_list); + pnfs_do_read(desc, hdr); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; -- cgit v1.2.3 From 19b54848fee419f0bb35479e4ea98d9f2b985657 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:55 -0400 Subject: pnfs: allow non page aligned pnfs layout segments Remove alignment checks that would revert to MDS and change pg_test to return the max ammount left in the segment (or other pg_test call) up to size of passed request, or 0 if no space is left. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6ef108b1d85f..ce46a417e500 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r WARN_ON_ONCE(pgio->pg_lseg != NULL); - if (req->wb_offset != req->wb_pgbase) { - nfs_pageio_reset_read_mds(pgio); - return; - } - if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); else @@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, { WARN_ON_ONCE(pgio->pg_lseg != NULL); - if (req->wb_offset != req->wb_pgbase) { - nfs_pageio_reset_write_mds(pgio); - return; - } - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), @@ -1443,9 +1433,9 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { unsigned int size; + u64 end; size = nfs_generic_pg_test(pgio, prev, req); - if (!size) return 0; @@ -1463,11 +1453,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - if (req_offset(req) >= end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) - return 0; + if (pgio->pg_lseg) { + end = end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); + WARN_ON_ONCE(req_offset(req) > end); + if (req_offset(req) >= end) + return 0; + size = min((unsigned int)(end - req_offset(req)), size); + } - return min(size, req->wb_bytes); + return size; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -- cgit v1.2.3 From c5e20cb700c0e36fb5499093b96e80350c6eb48e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 17:47:26 -0400 Subject: pnfs: fix lockup caused by pnfs_generic_pg_test end_offset and req_offset both return u64 - avoid casting to u32 until it's needed, when it's less than the (u32) size returned by nfs_generic_pg_test. Also, fix the comments in pnfs_generic_pg_test. Running the cthon04 special tests caused this lockup in the "write/read at 2GB, 4GB edges" test when running against a file layout server: BUG: soft lockup - CPU#0 stuck for 22s! [bigfile2:823] Modules linked in: nfs_layout_nfsv41_files rpcsec_gss_krb5 nfsv4 nfs fscache ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_mangle ip6table_filter ip6_tables iptable_nat nf_nat_ipv4 nf_nat iptable_mangle ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw e1000 shpchp i2c_piix4 i2c_core parport_pc parport nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc btrfs xor zlib_deflate raid6_pq mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy autofs4 irq event stamp: 205958 hardirqs last enabled at (205957): [] restore_args+0x0/0x30 hardirqs last disabled at (205958): [] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (205956): [] __do_softirq+0x1ea/0x2ab softirqs last disabled at (205951): [] irq_exit+0x44/0x9a CPU: 0 PID: 823 Comm: bigfile2 Not tainted 3.15.0-rc1-branch-pgio_plus+ #3 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 task: ffff8800792ec480 ti: ffff880078c4e000 task.ti: ffff880078c4e000 RIP: 0010:[] [] nfs_page_group_unlock+0x3e/0x4b [nfs] RSP: 0018:ffff880078c4fab0 EFLAGS: 00000202 RAX: 0000000000000fff RBX: ffff88006bf83300 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff88006bf83300 RBP: ffff880078c4fab8 R08: 0000000000000001 R09: 0000000000000000 R10: ffffffff8249840c R11: 0000000000000000 R12: 0000000000000035 R13: ffff88007ffc72d8 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f45f11b7740(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3a8cb632d0 CR3: 000000007931c000 CR4: 00000000001407f0 Stack: ffff88006bf832c0 ffff880078c4fb00 ffffffffa02cec22 ffff880078c4fad8 00000fff810f9d99 ffff880078c4fca0 ffff88006bf832c0 ffff88006bf832c0 ffff880078c4fca0 ffff880078c4fd60 ffff880078c4fb28 ffffffffa02cee34 Call Trace: [] __nfs_pageio_add_request+0x298/0x34f [nfs] [] nfs_pageio_add_request+0x1f/0x42 [nfs] [] nfs_do_writepage+0x1b5/0x1e4 [nfs] [] nfs_writepages_callback+0x13/0x25 [nfs] [] ? nfs_do_writepage+0x1e4/0x1e4 [nfs] [] write_cache_pages+0x254/0x37f [] ? nfs_do_writepage+0x1e4/0x1e4 [nfs] [] ? printk+0x54/0x56 [] ? __set_page_dirty_nobuffers+0x22/0xe9 [] ? put_rpccred+0x38/0x101 [sunrpc] [] nfs_writepages+0xb4/0xf8 [nfs] [] do_writepages+0x21/0x2f [] __filemap_fdatawrite_range+0x55/0x57 [] filemap_write_and_wait_range+0x2d/0x5b [] nfs4_file_fsync+0x3a/0x98 [nfsv4] [] vfs_fsync_range+0x18/0x20 [] generic_file_aio_write+0xa7/0xbd [] nfs_file_write+0xf0/0x170 [nfs] [] do_sync_write+0x59/0x78 [] vfs_write+0xab/0x107 [] SyS_write+0x49/0x7f [] system_call_fastpath+0x16/0x1b Reported-by: Anna Schumaker Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ce46a417e500..ee60c42b72b3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1433,33 +1433,37 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { unsigned int size; - u64 end; + u64 seg_end, req_start, seg_left; size = nfs_generic_pg_test(pgio, prev, req); if (!size) return 0; /* - * Test if a nfs_page is fully contained in the pnfs_layout_range. - * Note that this test makes several assumptions: - * - that the previous nfs_page in the struct nfs_pageio_descriptor - * is known to lie within the range. - * - that the nfs_page being tested is known to be contiguous with the - * previous nfs_page. - * - Layout ranges are page aligned, so we only have to test the - * start offset of the request. + * 'size' contains the number of bytes left in the current page (up + * to the original size asked for in @req->wb_bytes). + * + * Calculate how many bytes are left in the layout segment + * and if there are less bytes than 'size', return that instead. * * Please also note that 'end_offset' is actually the offset of the * first byte that lies outside the pnfs_layout_range. FIXME? * */ if (pgio->pg_lseg) { - end = end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); - WARN_ON_ONCE(req_offset(req) > end); - if (req_offset(req) >= end) + seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); + req_start = req_offset(req); + WARN_ON_ONCE(req_start > seg_end); + /* start of request is past the last byte of this segment */ + if (req_start >= seg_end) return 0; - size = min((unsigned int)(end - req_offset(req)), size); + + /* adjust 'size' iff there are fewer bytes left in the + * segment than what nfs_generic_pg_test returned */ + seg_left = seg_end - req_start; + if (seg_left < size) + size = (unsigned int)seg_left; } return size; -- cgit v1.2.3