diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-04 19:55:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-04 19:55:11 -0700 |
commit | 4d4700707c0d4be0efc968989fb1cd01c60c0a35 (patch) | |
tree | 478453a4ae9453bd8d26ffc3df6eedcc30799a43 /net | |
parent | 7e20ef030dde0e52dd5a57220ee82fa9facbea4e (diff) | |
parent | 84dde76c4a2d99ed2d7de6ec82c53b56620900a3 (diff) |
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (28 commits)
NFS: Fix a compile glitch on 64-bit systems
NFS: Clean up nfs_create_request comments
spkm3: initialize hash
spkm3: remove bad kfree, unnecessary export
spkm3: fix spkm3's use of hmac
NFS4: invalidate cached acl on setacl
NFS: Fix directory caching problem - with test case and patch.
NFS: Set meaningful value for fattr->time_start in readdirplus results.
NFS: Added support to turn off the NFSv3 READDIRPLUS RPC.
SUNRPC: RPC client should retry with different versions of rpcbind
SUNRPC: remove old portmapper
NFS: switch NFSROOT to use new rpcbind client
SUNRPC: switch the RPC server to use the new rpcbind registration API
SUNRPC: switch socket-based RPC transports to use rpcbind
SUNRPC: introduce rpcbind: replacement for in-kernel portmapper
SUNRPC: Eliminate side effects from rpc_malloc
SUNRPC: RPC buffer size estimates are too large
NLM: Shrink the maximum request size of NLM4 requests
NFS: Use pgoff_t in structures and functions that pass page cache offsets
NFS: Clean up nfs_sync_mapping_wait()
...
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/Makefile | 2 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_spkm3_seal.c | 13 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 69 | ||||
-rw-r--r-- | net/sunrpc/pmap_clnt.c | 383 | ||||
-rw-r--r-- | net/sunrpc/rpcb_clnt.c | 625 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 65 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 4 |
9 files changed, 714 insertions, 453 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index cdcab9ca4c60..8ebfc4db7f51 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - pmap_clnt.o timer.o xdr.o \ + rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 104cbf4f769f..d158635de6c0 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -123,9 +123,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, return GSS_S_COMPLETE; out_err: - if (md5cksum.data) - kfree(md5cksum.data); - token->data = NULL; token->len = 0; return GSS_S_FAILURE; @@ -152,7 +149,7 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, switch (cksumtype) { case CKSUMTYPE_HMAC_MD5: - cksumname = "md5"; + cksumname = "hmac(md5)"; break; default: dprintk("RPC: spkm3_make_checksum:" @@ -172,8 +169,12 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, if (err) goto out; + err = crypto_hash_init(&desc); + if (err) + goto out; + sg_set_buf(sg, header, hdrlen); - crypto_hash_update(&desc, sg, 1); + crypto_hash_update(&desc, sg, sg->length); xdr_process_buf(body, body_offset, body->len - body_offset, spkm3_checksummer, &desc); @@ -184,5 +185,3 @@ out: return err ? GSS_S_FAILURE : 0; } - -EXPORT_SYMBOL(make_spkm3_checksum); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 396cdbe249d1..d8fbee40a19c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,8 +36,6 @@ #include <linux/sunrpc/metrics.h> -#define RPC_SLACK_SPACE (1024) /* total overkill */ - #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL #endif @@ -747,21 +745,38 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { + unsigned int slack = task->tk_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; - unsigned int bufsiz; + struct rpc_procinfo *proc = task->tk_msg.rpc_proc; dprint_status(task); + task->tk_status = 0; task->tk_action = call_bind; + if (req->rq_buffer) return; - /* FIXME: compute buffer requirements more exactly using - * auth->au_wslack */ - bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; + if (proc->p_proc != 0) { + BUG_ON(proc->p_arglen == 0); + if (proc->p_decode != NULL) + BUG_ON(proc->p_replen == 0); + } - if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) + /* + * Calculate the size (in quads) of the RPC call + * and reply headers, and convert both values + * to byte sizes. + */ + req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen; + req->rq_callsize <<= 2; + req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; + req->rq_rcvsize <<= 2; + + req->rq_buffer = xprt->ops->buf_alloc(task, + req->rq_callsize + req->rq_rcvsize); + if (req->rq_buffer != NULL) return; dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -788,6 +803,17 @@ rpc_task_force_reencode(struct rpc_task *task) task->tk_rqstp->rq_snd_buf.len = 0; } +static inline void +rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = len; +} + /* * 3. Encode arguments of an RPC call */ @@ -795,28 +821,17 @@ static void call_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct xdr_buf *sndbuf = &req->rq_snd_buf; - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - unsigned int bufsiz; kxdrproc_t encode; __be32 *p; dprint_status(task); - /* Default buffer setup */ - bufsiz = req->rq_bufsize >> 1; - sndbuf->head[0].iov_base = (void *)req->rq_buffer; - sndbuf->head[0].iov_len = bufsiz; - sndbuf->tail[0].iov_len = 0; - sndbuf->page_len = 0; - sndbuf->len = 0; - sndbuf->buflen = bufsiz; - rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); - rcvbuf->head[0].iov_len = bufsiz; - rcvbuf->tail[0].iov_len = 0; - rcvbuf->page_len = 0; - rcvbuf->len = 0; - rcvbuf->buflen = bufsiz; + rpc_xdr_buf_init(&req->rq_snd_buf, + req->rq_buffer, + req->rq_callsize); + rpc_xdr_buf_init(&req->rq_rcv_buf, + (char *)req->rq_buffer + req->rq_callsize, + req->rq_rcvsize); /* Encode header and provided arguments */ encode = task->tk_msg.rpc_proc->p_encode; @@ -887,9 +902,11 @@ call_bind_status(struct rpc_task *task) task->tk_pid); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version 2 unavailable\n", + dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", task->tk_pid); - break; + task->tk_status = 0; + task->tk_action = call_bind; + return; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c deleted file mode 100644 index d9f765344589..000000000000 --- a/net/sunrpc/pmap_clnt.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * linux/net/sunrpc/pmap_clnt.c - * - * In-kernel RPC portmapper client. - * - * Portmapper supports version 2 of the rpcbind protocol (RFC 1833). - * - * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> - */ - -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/uio.h> -#include <linux/in.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/sched.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_PMAP -#endif - -#define PMAP_SET 1 -#define PMAP_UNSET 2 -#define PMAP_GETPORT 3 - -struct portmap_args { - u32 pm_prog; - u32 pm_vers; - u32 pm_prot; - unsigned short pm_port; - struct rpc_xprt * pm_xprt; -}; - -static struct rpc_procinfo pmap_procedures[]; -static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); -static void pmap_getport_done(struct rpc_task *, void *); -static struct rpc_program pmap_program; - -static void pmap_getport_prepare(struct rpc_task *task, void *calldata) -{ - struct portmap_args *map = calldata; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[PMAP_GETPORT], - .rpc_argp = map, - .rpc_resp = &map->pm_port, - }; - - rpc_call_setup(task, &msg, 0); -} - -static inline struct portmap_args *pmap_map_alloc(void) -{ - return kmalloc(sizeof(struct portmap_args), GFP_NOFS); -} - -static inline void pmap_map_free(struct portmap_args *map) -{ - kfree(map); -} - -static void pmap_map_release(void *data) -{ - struct portmap_args *map = data; - - xprt_put(map->pm_xprt); - pmap_map_free(map); -} - -static const struct rpc_call_ops pmap_getport_ops = { - .rpc_call_prepare = pmap_getport_prepare, - .rpc_call_done = pmap_getport_done, - .rpc_release = pmap_map_release, -}; - -static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status) -{ - xprt_clear_binding(xprt); - rpc_wake_up_status(&xprt->binding, status); -} - -/** - * rpc_getport - obtain the port for a given RPC service on a given host - * @task: task that is waiting for portmapper request - * - * This one can be called for an ongoing RPC request, and can be used in - * an async (rpciod) context. - */ -void rpc_getport(struct rpc_task *task) -{ - struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = task->tk_xprt; - struct sockaddr_in addr; - struct portmap_args *map; - struct rpc_clnt *pmap_clnt; - struct rpc_task *child; - int status; - - dprintk("RPC: %5u rpc_getport(%s, %u, %u, %d)\n", - task->tk_pid, clnt->cl_server, - clnt->cl_prog, clnt->cl_vers, xprt->prot); - - /* Autobind on cloned rpc clients is discouraged */ - BUG_ON(clnt->cl_parent != clnt); - - status = -EACCES; /* tell caller to check again */ - if (xprt_test_and_set_binding(xprt)) - goto bailout_nowake; - - /* Put self on queue before sending rpcbind request, in case - * pmap_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL, NULL); - - /* Someone else may have bound if we slept */ - status = 0; - if (xprt_bound(xprt)) - goto bailout_nofree; - - status = -ENOMEM; - map = pmap_map_alloc(); - if (!map) - goto bailout_nofree; - map->pm_prog = clnt->cl_prog; - map->pm_vers = clnt->cl_vers; - map->pm_prot = xprt->prot; - map->pm_port = 0; - map->pm_xprt = xprt_get(xprt); - - rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); - pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); - status = PTR_ERR(pmap_clnt); - if (IS_ERR(pmap_clnt)) - goto bailout; - - status = -EIO; - child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); - if (IS_ERR(child)) - goto bailout_nofree; - rpc_put_task(child); - - task->tk_xprt->stat.bind_count++; - return; - -bailout: - pmap_map_free(map); - xprt_put(xprt); -bailout_nofree: - pmap_wake_portmap_waiters(xprt, status); -bailout_nowake: - task->tk_status = status; -} - -#ifdef CONFIG_ROOT_NFS -/** - * rpc_getport_external - obtain the port for a given RPC service on a given host - * @sin: address of remote peer - * @prog: RPC program number to bind - * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request - * - * This one is called from outside the RPC client in a synchronous task context. - */ -int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) -{ - struct portmap_args map = { - .pm_prog = prog, - .pm_vers = vers, - .pm_prot = prot, - .pm_port = 0 - }; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[PMAP_GETPORT], - .rpc_argp = &map, - .rpc_resp = &map.pm_port, - }; - struct rpc_clnt *pmap_clnt; - char hostname[32]; - int status; - - dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", - NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); - pmap_clnt = pmap_create(hostname, sin, prot, 0); - if (IS_ERR(pmap_clnt)) - return PTR_ERR(pmap_clnt); - - /* Setup the call info struct */ - status = rpc_call_sync(pmap_clnt, &msg, 0); - - if (status >= 0) { - if (map.pm_port != 0) - return map.pm_port; - status = -EACCES; - } - return status; -} -#endif - -/* - * Portmapper child task invokes this callback via tk_exit. - */ -static void pmap_getport_done(struct rpc_task *child, void *data) -{ - struct portmap_args *map = data; - struct rpc_xprt *xprt = map->pm_xprt; - int status = child->tk_status; - - if (status < 0) { - /* Portmapper not available */ - xprt->ops->set_port(xprt, 0); - } else if (map->pm_port == 0) { - /* Requested RPC service wasn't registered */ - xprt->ops->set_port(xprt, 0); - status = -EACCES; - } else { - /* Succeeded */ - xprt->ops->set_port(xprt, map->pm_port); - xprt_set_bound(xprt); - status = 0; - } - - dprintk("RPC: %5u pmap_getport_done(status %d, port %u)\n", - child->tk_pid, status, map->pm_port); - - pmap_wake_portmap_waiters(xprt, status); -} - -/** - * rpc_register - set or unset a port registration with the local portmapper - * @prog: RPC program number to bind - * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request - * @port: port value to register - * @okay: result code - * - * port == 0 means unregister, port != 0 means register. - */ -int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) -{ - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; - struct portmap_args map = { - .pm_prog = prog, - .pm_vers = vers, - .pm_prot = prot, - .pm_port = port, - }; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET], - .rpc_argp = &map, - .rpc_resp = okay, - }; - struct rpc_clnt *pmap_clnt; - int error = 0; - - dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n", - prog, vers, prot, port); - - pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); - if (IS_ERR(pmap_clnt)) { - error = PTR_ERR(pmap_clnt); - dprintk("RPC: couldn't create pmap client. Error = %d\n", - error); - return error; - } - - error = rpc_call_sync(pmap_clnt, &msg, 0); - - if (error < 0) { - printk(KERN_WARNING - "RPC: failed to contact portmap (errno %d).\n", - error); - } - dprintk("RPC: registration status %d/%d\n", error, *okay); - - /* Client deleted automatically because cl_oneshot == 1 */ - return error; -} - -static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) -{ - struct rpc_create_args args = { - .protocol = proto, - .address = (struct sockaddr *)srvaddr, - .addrsize = sizeof(*srvaddr), - .servername = hostname, - .program = &pmap_program, - .version = RPC_PMAP_VERSION, - .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), - }; - - srvaddr->sin_port = htons(RPC_PMAP_PORT); - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; - return rpc_create(&args); -} - -/* - * XDR encode/decode functions for PMAP - */ -static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map) -{ - dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", - map->pm_prog, map->pm_vers, - map->pm_prot, map->pm_port); - *p++ = htonl(map->pm_prog); - *p++ = htonl(map->pm_vers); - *p++ = htonl(map->pm_prot); - *p++ = htonl(map->pm_port); - - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; -} - -static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp) -{ - *portp = (unsigned short) ntohl(*p++); - return 0; -} - -static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) -{ - *boolp = (unsigned int) ntohl(*p++); - return 0; -} - -static struct rpc_procinfo pmap_procedures[] = { -[PMAP_SET] = { - .p_proc = PMAP_SET, - .p_encode = (kxdrproc_t) xdr_encode_mapping, - .p_decode = (kxdrproc_t) xdr_decode_bool, - .p_bufsiz = 4, - .p_count = 1, - .p_statidx = PMAP_SET, - .p_name = "SET", - }, -[PMAP_UNSET] = { - .p_proc = PMAP_UNSET, - .p_encode = (kxdrproc_t) xdr_encode_mapping, - .p_decode = (kxdrproc_t) xdr_decode_bool, - .p_bufsiz = 4, - .p_count = 1, - .p_statidx = PMAP_UNSET, - .p_name = "UNSET", - }, -[PMAP_GETPORT] = { - .p_proc = PMAP_GETPORT, - .p_encode = (kxdrproc_t) xdr_encode_mapping, - .p_decode = (kxdrproc_t) xdr_decode_port, - .p_bufsiz = 4, - .p_count = 1, - .p_statidx = PMAP_GETPORT, - .p_name = "GETPORT", - }, -}; - -static struct rpc_version pmap_version2 = { - .number = 2, - .nrprocs = 4, - .procs = pmap_procedures -}; - -static struct rpc_version * pmap_version[] = { - NULL, - NULL, - &pmap_version2 -}; - -static struct rpc_stat pmap_stats; - -static struct rpc_program pmap_program = { - .name = "portmap", - .number = RPC_PMAP_PROGRAM, - .nrvers = ARRAY_SIZE(pmap_version), - .version = pmap_version, - .stats = &pmap_stats, -}; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c new file mode 100644 index 000000000000..6c7aa8a1f0c6 --- /dev/null +++ b/net/sunrpc/rpcb_clnt.c @@ -0,0 +1,625 @@ +/* + * In-kernel rpcbind client supporting versions 2, 3, and 4 of the rpcbind + * protocol + * + * Based on RFC 1833: "Binding Protocols for ONC RPC Version 2" and + * RFC 3530: "Network File System (NFS) version 4 Protocol" + * + * Original: Gilles Quillard, Bull Open Source, 2005 <gilles.quillard@bull.net> + * Updated: Chuck Lever, Oracle Corporation, 2007 <chuck.lever@oracle.com> + * + * Descended from net/sunrpc/pmap_clnt.c, + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <linux/errno.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/sched.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_BIND +#endif + +#define RPCBIND_PROGRAM (100000u) +#define RPCBIND_PORT (111u) + +enum { + RPCBPROC_NULL, + RPCBPROC_SET, + RPCBPROC_UNSET, + RPCBPROC_GETPORT, + RPCBPROC_GETADDR = 3, /* alias for GETPORT */ + RPCBPROC_DUMP, + RPCBPROC_CALLIT, + RPCBPROC_BCAST = 5, /* alias for CALLIT */ + RPCBPROC_GETTIME, + RPCBPROC_UADDR2TADDR, + RPCBPROC_TADDR2UADDR, + RPCBPROC_GETVERSADDR, + RPCBPROC_INDIRECT, + RPCBPROC_GETADDRLIST, + RPCBPROC_GETSTAT, +}; + +#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT +#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR +#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT + +/* + * r_addr + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the + * US-ASCII string: + * + * h1.h2.h3.h4.p1.p2 + * + * The prefix, "h1.h2.h3.h4", is the standard textual form for + * representing an IPv4 address, which is always four octets long. + * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively, + * the first through fourth octets each converted to ASCII-decimal. + * Assuming big-endian ordering, p1 and p2 are, respectively, the first + * and second octets each converted to ASCII-decimal. For example, if a + * host, in big-endian order, has an address of 0x0A010307 and there is + * a service listening on, in big endian order, port 0x020F (decimal + * 527), then the complete universal address is "10.1.3.7.2.15". + * + * ... + * + * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the + * US-ASCII string: + * + * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2 + * + * The suffix "p1.p2" is the service port, and is computed the same way + * as with universal addresses for TCP and UDP over IPv4. The prefix, + * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for + * representing an IPv6 address as defined in Section 2.2 of [RFC2373]. + * Additionally, the two alternative forms specified in Section 2.2 of + * [RFC2373] are also acceptable. + * + * XXX: Currently this implementation does not explicitly convert the + * stored address to US-ASCII on non-ASCII systems. + */ +#define RPCB_MAXADDRLEN (128u) + +/* + * r_netid + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP + * over IPv4 the value of r_netid is the string "udp". + * + * ... + * + * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP + * over IPv6 the value of r_netid is the string "udp6". + */ +#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ +#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ +#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ +#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ + +#define RPCB_MAXNETIDLEN (4u) + +/* + * r_owner + * + * The "owner" is allowed to unset a service in the rpcbind database. + * We always use the following (arbitrary) fixed string. + */ +#define RPCB_OWNER_STRING "rpcb" +#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) + +static void rpcb_getport_done(struct rpc_task *, void *); +extern struct rpc_program rpcb_program; + +struct rpcbind_args { + struct rpc_xprt * r_xprt; + + u32 r_prog; + u32 r_vers; + u32 r_prot; + unsigned short r_port; + char * r_netid; + char r_addr[RPCB_MAXADDRLEN]; + char * r_owner; +}; + +static struct rpc_procinfo rpcb_procedures2[]; +static struct rpc_procinfo rpcb_procedures3[]; + +static struct rpcb_info { + int rpc_vers; + struct rpc_procinfo * rpc_proc; +} rpcb_next_version[]; + +static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) +{ + struct rpcbind_args *map = calldata; + struct rpc_xprt *xprt = map->r_xprt; + struct rpc_message msg = { + .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc, + .rpc_argp = map, + .rpc_resp = &map->r_port, + }; + + rpc_call_setup(task, &msg, 0); +} + +static void rpcb_map_release(void *data) +{ + struct rpcbind_args *map = data; + + xprt_put(map->r_xprt); + kfree(map); +} + +static const struct rpc_call_ops rpcb_getport_ops = { + .rpc_call_prepare = rpcb_getport_prepare, + .rpc_call_done = rpcb_getport_done, + .rpc_release = rpcb_map_release, +}; + +static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + +static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, + int proto, int version, int privileged) +{ + struct rpc_create_args args = { + .protocol = proto, + .address = srvaddr, + .addrsize = sizeof(struct sockaddr_in), + .servername = hostname, + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_ONESHOT | + RPC_CLNT_CREATE_NOPING), + }; + + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + if (!privileged) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + return rpc_create(&args); +} + +/** + * rpcb_register - set or unset a port registration with the local rpcbind svc + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * @port: port value to register + * @okay: result code + * + * port == 0 means unregister, port != 0 means register. + * + * This routine supports only rpcbind version 2. + */ +int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct rpcbind_args map = { + .r_prog = prog, + .r_vers = vers, + .r_prot = prot, + .r_port = port, + }; + struct rpc_message msg = { + .rpc_proc = &rpcb_procedures2[port ? + RPCBPROC_SET : RPCBPROC_UNSET], + .rpc_argp = &map, + .rpc_resp = okay, + }; + struct rpc_clnt *rpcb_clnt; + int error = 0; + + dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " + "rpcbind\n", (port ? "" : "un"), + prog, vers, prot, port); + + rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, + IPPROTO_UDP, 2, 1); + if (IS_ERR(rpcb_clnt)) + return PTR_ERR(rpcb_clnt); + + error = rpc_call_sync(rpcb_clnt, &msg, 0); + + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + dprintk("RPC: registration status %d/%d\n", error, *okay); + + return error; +} + +#ifdef CONFIG_ROOT_NFS +/** + * rpcb_getport_external - obtain the port for an RPC service on a given host + * @sin: address of remote peer + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * + * Called from outside the RPC client in a synchronous task context. + * + * For now, this supports only version 2 queries, but is used only by + * mount_clnt for NFS_ROOT. + */ +int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, + __u32 vers, int prot) +{ + struct rpcbind_args map = { + .r_prog = prog, + .r_vers = vers, + .r_prot = prot, + .r_port = 0, + }; + struct rpc_message msg = { + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + .rpc_argp = &map, + .rpc_resp = &map.r_port, + }; + struct rpc_clnt *rpcb_clnt; + char hostname[40]; + int status; + + dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", + NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); + rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); + if (IS_ERR(rpcb_clnt)) + return PTR_ERR(rpcb_clnt); + + status = rpc_call_sync(rpcb_clnt, &msg, 0); + + if (status >= 0) { + if (map.r_port != 0) + return map.r_port; + status = -EACCES; + } + return status; +} +#endif + +/** + * rpcb_getport - obtain the port for a given RPC service on a given host + * @task: task that is waiting for portmapper request + * + * This one can be called for an ongoing RPC request, and can be used in + * an async (rpciod) context. + */ +void rpcb_getport(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + int bind_version; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_clnt *rpcb_clnt; + static struct rpcbind_args *map; + struct rpc_task *child; + struct sockaddr addr; + int status; + + dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n", + task->tk_pid, clnt->cl_server, + clnt->cl_prog, clnt->cl_vers, xprt->prot); + + /* Autobind on cloned rpc clients is discouraged */ + BUG_ON(clnt->cl_parent != clnt); + + if (xprt_test_and_set_binding(xprt)) { + status = -EACCES; /* tell caller to check again */ + dprintk("RPC: %5u rpcb_getport waiting for another binder\n", + task->tk_pid); + goto bailout_nowake; + } + + /* Put self on queue before sending rpcbind request, in case + * rpcb_getport_done completes before we return from rpc_run_task */ + rpc_sleep_on(&xprt->binding, task, NULL, NULL); + + /* Someone else may have bound if we slept */ + if (xprt_bound(xprt)) { + status = 0; + dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid); + goto bailout_nofree; + } + + if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + xprt->bind_index = 0; + status = -EACCES; /* tell caller to try again later */ + dprintk("RPC: %5u rpcb_getport no more getport versions " + "available\n", task->tk_pid); + goto bailout_nofree; + } + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + + dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n", + task->tk_pid, bind_version); + + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); + if (!map) { + status = -ENOMEM; + dprintk("RPC: %5u rpcb_getport no memory available\n", + task->tk_pid); + goto bailout_nofree; + } + map->r_prog = clnt->cl_prog; + map->r_vers = clnt->cl_vers; + map->r_prot = xprt->prot; + map->r_port = 0; + map->r_xprt = xprt_get(xprt); + map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : + RPCB_NETID_UDP; + memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), + sizeof(map->r_addr)); + map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n", + task->tk_pid, PTR_ERR(rpcb_clnt)); + goto bailout; + } + + child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + if (IS_ERR(child)) { + status = -EIO; + dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", + task->tk_pid); + goto bailout_nofree; + } + rpc_put_task(child); + + task->tk_xprt->stat.bind_count++; + return; + +bailout: + kfree(map); + xprt_put(xprt); +bailout_nofree: + rpcb_wake_rpcbind_waiters(xprt, status); +bailout_nowake: + task->tk_status = status; +} + +/* + * Rpcbind child task calls this callback via tk_exit. + */ +static void rpcb_getport_done(struct rpc_task *child, void *data) +{ + struct rpcbind_args *map = data; + struct rpc_xprt *xprt = map->r_xprt; + int status = child->tk_status; + + /* rpcbind server doesn't support this rpcbind protocol version */ + if (status == -EPROTONOSUPPORT) + xprt->bind_index++; + + if (status < 0) { + /* rpcbind server not available on remote host? */ + xprt->ops->set_port(xprt, 0); + } else if (map->r_port == 0) { + /* Requested RPC service wasn't registered on remote host */ + xprt->ops->set_port(xprt, 0); + status = -EACCES; + } else { + /* Succeeded */ + xprt->ops->set_port(xprt, map->r_port); + xprt_set_bound(xprt); + status = 0; + } + + dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", + child->tk_pid, status, map->r_port); + + rpcb_wake_rpcbind_waiters(xprt, status); +} + +static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", + rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); + *p++ = htonl(rpcb->r_prog); + *p++ = htonl(rpcb->r_vers); + *p++ = htonl(rpcb->r_prot); + *p++ = htonl(rpcb->r_port); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, + unsigned short *portp) +{ + *portp = (unsigned short) ntohl(*p++); + dprintk("RPC: rpcb_decode_getport result %u\n", + *portp); + return 0; +} + +static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, + unsigned int *boolp) +{ + *boolp = (unsigned int) ntohl(*p++); + dprintk("RPC: rpcb_decode_set result %u\n", + *boolp); + return 0; +} + +static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", + rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); + *p++ = htonl(rpcb->r_prog); + *p++ = htonl(rpcb->r_vers); + + p = xdr_encode_string(p, rpcb->r_netid); + p = xdr_encode_string(p, rpcb->r_addr); + p = xdr_encode_string(p, rpcb->r_owner); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + return 0; +} + +static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, + unsigned short *portp) +{ + char *addr; + int addr_len, c, i, f, first, val; + + *portp = 0; + addr_len = (unsigned int) ntohl(*p++); + if (addr_len > RPCB_MAXADDRLEN) /* sanity */ + return -EINVAL; + + dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", + (char *) p); + + addr = (char *)p; + val = 0; + first = 1; + f = 1; + for (i = addr_len - 1; i > 0; i--) { + c = addr[i]; + if (c >= '0' && c <= '9') { + val += (c - '0') * f; + f *= 10; + } else if (c == '.') { + if (first) { + *portp = val; + val = first = 0; + f = 1; + } else { + *portp |= (val << 8); + break; + } + } + } + + dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); + return 0; +} + +#define RPCB_program_sz (1u) +#define RPCB_version_sz (1u) +#define RPCB_protocol_sz (1u) +#define RPCB_port_sz (1u) +#define RPCB_boolean_sz (1u) + +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) +#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) +#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) + +#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ + RPCB_protocol_sz+RPCB_port_sz +#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \ + RPCB_netid_sz+RPCB_addr_sz+ \ + RPCB_ownerstring_sz + +#define RPCB_setres_sz RPCB_boolean_sz +#define RPCB_getportres_sz RPCB_port_sz + +/* + * Note that RFC 1833 does not put any size restrictions on the + * address string returned by the remote rpcbind database. + */ +#define RPCB_getaddrres_sz RPCB_addr_sz + +#define PROC(proc, argtype, restype) \ + [RPCBPROC_##proc] = { \ + .p_proc = RPCBPROC_##proc, \ + .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \ + .p_decode = (kxdrproc_t) rpcb_decode_##restype, \ + .p_arglen = RPCB_##argtype##args_sz, \ + .p_replen = RPCB_##restype##res_sz, \ + .p_statidx = RPCBPROC_##proc, \ + .p_timer = 0, \ + .p_name = #proc, \ + } + +/* + * Not all rpcbind procedures described in RFC 1833 are implemented + * since the Linux kernel RPC code requires only these. + */ +static struct rpc_procinfo rpcb_procedures2[] = { + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETADDR, mapping, getport), +}; + +static struct rpc_procinfo rpcb_procedures3[] = { + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETADDR, getaddr, getaddr), +}; + +static struct rpc_procinfo rpcb_procedures4[] = { + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETVERSADDR, getaddr, getaddr), +}; + +static struct rpcb_info rpcb_next_version[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, + { 0, NULL }, +}; + +static struct rpc_version rpcb_version2 = { + .number = 2, + .nrprocs = RPCB_HIGHPROC_2, + .procs = rpcb_procedures2 +}; + +static struct rpc_version rpcb_version3 = { + .number = 3, + .nrprocs = RPCB_HIGHPROC_3, + .procs = rpcb_procedures3 +}; + +static struct rpc_version rpcb_version4 = { + .number = 4, + .nrprocs = RPCB_HIGHPROC_4, + .procs = rpcb_procedures4 +}; + +static struct rpc_version *rpcb_version[] = { + NULL, + NULL, + &rpcb_version2, + &rpcb_version3, + &rpcb_version4 +}; + +static struct rpc_stat rpcb_stats; + +struct rpc_program rpcb_program = { + .name = "rpcbind", + .number = RPCBIND_PROGRAM, + .nrvers = ARRAY_SIZE(rpcb_version), + .version = rpcb_version, + .stats = &rpcb_stats, +}; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6d87320074b1..4a53e94f8134 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -741,50 +741,53 @@ static void rpc_async_schedule(struct work_struct *work) * @task: RPC task that will use this buffer * @size: requested byte size * - * We try to ensure that some NFS reads and writes can always proceed - * by using a mempool when allocating 'small' buffers. + * To prevent rpciod from hanging, this allocator never sleeps, + * returning NULL if the request cannot be serviced immediately. + * The caller can arrange to sleep in a way that is safe for rpciod. + * + * Most requests are 'small' (under 2KiB) and can be serviced from a + * mempool, ensuring that NFS reads and writes can always proceed, + * and that there is good locality of reference for these buffers. + * * In order to avoid memory starvation triggering more writebacks of - * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. + * NFS requests, we avoid using GFP_KERNEL. */ -void * rpc_malloc(struct rpc_task *task, size_t size) +void *rpc_malloc(struct rpc_task *task, size_t size) { - struct rpc_rqst *req = task->tk_rqstp; - gfp_t gfp; + size_t *buf; + gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; - if (task->tk_flags & RPC_TASK_SWAPPER) - gfp = GFP_ATOMIC; + size += sizeof(size_t); + if (size <= RPC_BUFFER_MAXSIZE) + buf = mempool_alloc(rpc_buffer_mempool, gfp); else - gfp = GFP_NOFS; - - if (size > RPC_BUFFER_MAXSIZE) { - req->rq_buffer = kmalloc(size, gfp); - if (req->rq_buffer) - req->rq_bufsize = size; - } else { - req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); - if (req->rq_buffer) - req->rq_bufsize = RPC_BUFFER_MAXSIZE; - } - return req->rq_buffer; + buf = kmalloc(size, gfp); + *buf = size; + dprintk("RPC: %5u allocated buffer of size %u at %p\n", + task->tk_pid, size, buf); + return (void *) ++buf; } /** * rpc_free - free buffer allocated via rpc_malloc - * @task: RPC task with a buffer to be freed + * @buffer: buffer to free * */ -void rpc_free(struct rpc_task *task) +void rpc_free(void *buffer) { - struct rpc_rqst *req = task->tk_rqstp; + size_t size, *buf = (size_t *) buffer; - if (req->rq_buffer) { - if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) - mempool_free(req->rq_buffer, rpc_buffer_mempool); - else - kfree(req->rq_buffer); - req->rq_buffer = NULL; - req->rq_bufsize = 0; - } + if (!buffer) + return; + size = *buf; + buf--; + + dprintk("RPC: freeing buffer of size %u at %p\n", + size, buf); + if (size <= RPC_BUFFER_MAXSIZE) + mempool_free(buf, rpc_buffer_mempool); + else + kfree(buf); } /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b4db53ff1435..b7503c103ae8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -757,7 +757,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) if (progp->pg_vers[i]->vs_hidden) continue; - error = rpc_register(progp->pg_prog, i, proto, port, &dummy); + error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); if (error < 0) break; if (port && !dummy) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 456a14510308..5b05b73e4c1d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -823,7 +823,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; - req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); @@ -855,7 +854,7 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); - xprt->ops->buf_free(task); + xprt->ops->buf_free(req->rq_buffer); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); @@ -928,6 +927,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si xprt->timer.data = (unsigned long) xprt; xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; + xprt->bind_index = 0; rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a5a32029e728..cc33c5880abb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1476,7 +1476,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, - .rpcbind = rpc_getport, + .rpcbind = rpcb_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1493,7 +1493,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, - .rpcbind = rpc_getport, + .rpcbind = rpcb_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, |