diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2011-08-16 10:50:10 +0530 |
---|---|---|
committer | Eric Van Hensbergen <ericvh@gmail.com> | 2011-10-24 11:13:11 -0500 |
commit | abfa034e4b8ed0046fa589769e9840af645bc4ba (patch) | |
tree | 146195e891a2fd63e3fc91f803547abb904b2087 /net/9p/client.c | |
parent | c3b92c8787367a8bb53d57d9789b558f1295cc96 (diff) |
fs/9p: Update zero-copy implementation in 9p
* remove lot of update to different data structure
* add a seperate callback for zero copy request.
* above makes non zero copy code path simpler
* remove conditionalizing TREAD/TREADDIR/TWRITE in the zero copy path
* Fix the dotu p9_check_errors with zero copy. Add sufficient doc around
* Add support for both in and output buffers in zero copy callback
* pin and unpin pages in the same context
* use helpers instead of defining page offset and rest of page ourself
* Fix mem leak in p9_check_errors
* Remove 'E' and 'F' in p9pdu_vwritef
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Diffstat (limited to 'net/9p/client.c')
-rw-r--r-- | net/9p/client.c | 391 |
1 files changed, 291 insertions, 100 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 0505a03c374c..305a4e719b03 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -203,11 +203,12 @@ free_and_return: * */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag, int max_size) { unsigned long flags; int row, col; struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); /* This looks up the original request by tag so we know which * buffer to read the data into */ @@ -245,23 +246,12 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - int alloc_msize = min(c->msize, 4096); - req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->tc->capacity = alloc_msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->rc->capacity = alloc_msize; - } else { - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->tc->capacity = c->msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->rc->capacity = c->msize; - } + req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); + req->tc->capacity = alloc_msize; + req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); + req->rc->capacity = alloc_msize; if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -485,27 +475,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!p9_is_proto_dotl(c)) { char *ename; - - if (req->tc->pbuf_size) { - /* Handle user buffers */ - size_t len = req->rc->size - req->rc->offset; - if (req->tc->pubuf) { - /* User Buffer */ - err = copy_from_user( - &req->rc->sdata[req->rc->offset], - req->tc->pubuf, len); - if (err) { - err = -EFAULT; - goto out_err; - } - } else { - /* Kernel Buffer */ - memmove(&req->rc->sdata[req->rc->offset], - req->tc->pkbuf, len); - } - } err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); + &ename, &ecode); if (err) goto out_err; @@ -515,11 +486,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, - ename); - - kfree(ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } + kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; @@ -527,7 +497,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } - return err; out_err: @@ -536,6 +505,110 @@ out_err: return err; } +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + uint16_t len; + /* 7 = header size for RERROR, 2 is the size of string len; */ + int inline_len = in_hdrlen - (7 + 2); + + /* Read the size of error string */ + err = p9pdu_readf(req->rc, c->proto_version, "w", &len); + if (err) + goto out_err; + + ename = kmalloc(len + 1, GFP_NOFS); + if (!ename) { + err = -ENOMEM; + goto out_err; + } + if (len <= inline_len) { + /* We have error in protocol buffer itself */ + if (pdu_read(req->rc, ename, len)) { + err = -EFAULT; + goto out_free; + + } + } else { + /* + * Part of the data is in user space buffer. + */ + if (pdu_read(req->rc, ename, inline_len)) { + err = -EFAULT; + goto out_free; + + } + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_free; + } + } + } + ename[len] = 0; + if (p9_is_proto_dotu(c)) { + /* For dotu we also have error code */ + err = p9pdu_readf(req->rc, + c->proto_version, "d", &ecode); + if (err) + goto out_free; + err = -ecode; + } + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + return err; + +out_free: + kfree(ename); +out_err: + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + return err; +} + static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); @@ -579,23 +652,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return 0; } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) { - va_list ap; int tag, err; struct p9_req_t *req; - unsigned long flags; - int sigpending; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -607,12 +669,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } else - sigpending = 0; - tag = P9_NOTAG; if (type != P9_TVERSION) { tag = p9_idpool_get(c->tagpool); @@ -620,18 +676,50 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return ERR_PTR(-ENOMEM); } - req = p9_tag_alloc(c, tag); + req = p9_tag_alloc(c, tag, req_size); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(req->tc, tag, type); - va_start(ap, fmt); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); - va_end(ap); if (err) goto reterr; p9pdu_finalize(req->tc); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; err = c->trans_mod->request(c, req); if (err < 0) { @@ -639,18 +727,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } - - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + /* Wait for the response */ err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", - req->wq, tag, err); + req->status >= REQ_STATUS_RCVD); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; @@ -663,13 +747,11 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (req->status == REQ_STATUS_RCVD) err = 0; } - if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (err < 0) goto reterr; @@ -678,7 +760,92 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); return req; } +reterr: + P9_DPRINTK(P9_DEBUG_MUX, + "exit: client %p op %d error: %d\n", c, type, err); + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + goto reterr; + } + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + if (err < 0) + goto reterr; + + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + if (!err) { + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + return req; + } reterr: P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, err); @@ -1330,13 +1497,15 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) { - int err, rsize; - struct p9_client *clnt; - struct p9_req_t *req; char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; + struct p9_client *clnt; + int err, rsize, non_zc = 0; + - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1348,13 +1517,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, - rsize, data, udata); + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, - rsize); + rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1370,7 +1550,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); P9_DUMP_PKT(1, req->rc); - if (!req->tc->pbuf_size) { + if (non_zc) { if (data) { memmove(data, dataptr, count); } else { @@ -1396,6 +1576,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count) { int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; struct p9_req_t *req; @@ -1411,19 +1592,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, - rsize, data, udata); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); } else { - if (data) req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, - offset, rsize, data); + offset, rsize, data); else req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, - offset, rsize, udata); + offset, rsize, udata); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1824,7 +2010,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize; + int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -1842,13 +2028,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, - offset, rsize, data); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, - offset, rsize); + offset, rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1863,7 +2054,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (!req->tc->pbuf_size && data) + if (non_zc) memmove(data, dataptr, count); p9_free_req(clnt, req); |