diff options
Diffstat (limited to 'net')
34 files changed, 751 insertions, 433 deletions
diff --git a/net/core/flow.c b/net/core/flow.c index a77531c139b7..19991175fdeb 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -23,7 +23,6 @@ #include <linux/mutex.h> #include <net/flow.h> #include <asm/atomic.h> -#include <asm/semaphore.h> #include <linux/security.h> struct flow_cache_entry { diff --git a/net/core/sock.c b/net/core/sock.c index 54c836a2216b..5dbb81bc9673 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -450,15 +450,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, * Options without arguments */ -#ifdef SO_DONTLINGER /* Compatibility item... */ - if (optname == SO_DONTLINGER) { - lock_sock(sk); - sock_reset_flag(sk, SOCK_LINGER); - release_sock(sk); - return 0; - } -#endif - if (optname == SO_BINDTODEVICE) return sock_bindtodevice(sk, optval, optlen); @@ -942,7 +933,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance - * @zero_it: if we should zero the newly allocated sock */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a4c1b36f4bc7..9dfe2470962c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -27,7 +27,6 @@ #include <net/xfrm.h> #include <asm/ioctls.h> -#include <asm/semaphore.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/delay.h> diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index fb53ddfea5b5..a75807b971b3 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -14,7 +14,6 @@ * - Adaptive compression. */ #include <linux/module.h> -#include <asm/semaphore.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index e3dcfa2f436b..ee6de425ce6b 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -34,7 +34,6 @@ #include <net/ip.h> #include <net/xfrm.h> #include <net/ipcomp.h> -#include <asm/semaphore.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 742003d3a841..9ee3affab346 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/jhash.h> +#include <asm/unaligned.h> #include "ieee80211_i.h" diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 02de8f1522a3..3df809222d1c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -7,7 +7,6 @@ * published by the Free Software Foundation. */ -#include <asm/unaligned.h> #include "mesh.h" #define TEST_FRAME_LEN 8192 diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6d38a81b336d..ba3f6e49fddc 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -493,8 +493,8 @@ static int rxkad_verify_packet(const struct rxrpc_call *call, __be32 x[2]; } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ __be32 x; - u16 y; __be16 cksum; + u32 y; int ret; sp = rxrpc_skb(skb); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 92e1dbe50947..5369aa369b35 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ - auth.o auth_null.o auth_unix.o \ + auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index eca941ce298b..6bfea9ed6869 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/errno.h> +#include <linux/hash.h> #include <linux/sunrpc/clnt.h> #include <linux/spinlock.h> @@ -219,6 +220,9 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) } EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); + +#define RPC_AUTH_EXPIRY_MORATORIUM (60 * HZ) + /* * Remove stale credentials. Avoid sleeping inside the loop. */ @@ -227,6 +231,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; struct rpc_cred *cred; + unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; while (!list_empty(&cred_unused)) { cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); @@ -234,6 +239,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) number_cred_unused--; if (atomic_read(&cred->cr_count) != 0) continue; + /* Enforce a 5 second garbage collection moratorium */ + if (time_in_range(cred->cr_expire, expired, jiffies) && + test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) + continue; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); if (atomic_read(&cred->cr_count) == 0) { @@ -280,10 +289,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, struct hlist_node *pos; struct rpc_cred *cred = NULL, *entry, *new; - int nr = 0; + unsigned int nr; - if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) - nr = acred->uid & RPC_CREDCACHE_MASK; + nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); rcu_read_lock(); hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { @@ -356,7 +364,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) put_group_info(acred.group_info); return ret; } -EXPORT_SYMBOL_GPL(rpcauth_lookupcred); void rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, @@ -375,41 +382,58 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -struct rpc_cred * -rpcauth_bindcred(struct rpc_task *task) +void +rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +{ + task->tk_msg.rpc_cred = get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); +} +EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); + +static void +rpcauth_bind_root_cred(struct rpc_task *task) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { - .uid = current->fsuid, - .gid = current->fsgid, - .group_info = current->group_info, + .uid = 0, + .gid = 0, }; struct rpc_cred *ret; - int flags = 0; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - get_group_info(acred.group_info); - if (task->tk_flags & RPC_TASK_ROOTCREDS) - flags |= RPCAUTH_LOOKUP_ROOTCREDS; - ret = auth->au_ops->lookup_cred(auth, &acred, flags); + ret = auth->au_ops->lookup_cred(auth, &acred, 0); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); +} + +static void +rpcauth_bind_new_cred(struct rpc_task *task) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct rpc_cred *ret; + + dprintk("RPC: %5u looking up %s cred\n", + task->tk_pid, auth->au_ops->au_name); + ret = rpcauth_lookupcred(auth, 0); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else task->tk_status = PTR_ERR(ret); - put_group_info(acred.group_info); - return ret; } void -rpcauth_holdcred(struct rpc_task *task) +rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; - if (cred != NULL) { - get_rpccred(cred); - dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, - cred->cr_auth->au_ops->au_name, cred); - } + if (cred != NULL) + cred->cr_ops->crbind(task, cred); + else if (flags & RPC_TASK_ROOTCREDS) + rpcauth_bind_root_cred(task); + else + rpcauth_bind_new_cred(task); } void @@ -550,6 +574,7 @@ static struct shrinker rpc_cred_shrinker = { void __init rpcauth_init_module(void) { rpc_init_authunix(); + rpc_init_generic_auth(); register_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c new file mode 100644 index 000000000000..d927d9f57412 --- /dev/null +++ b/net/sunrpc/auth_generic.c @@ -0,0 +1,177 @@ +/* + * Generic RPC credential + * + * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com> + */ + +#include <linux/err.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/debug.h> +#include <linux/sunrpc/sched.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +#define RPC_ANONYMOUS_USERID ((uid_t)-2) +#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) + +struct generic_cred { + struct rpc_cred gc_base; + struct auth_cred acred; +}; + +static struct rpc_auth generic_auth; +static struct rpc_cred_cache generic_cred_cache; +static const struct rpc_credops generic_credops; + +/* + * Public call interface + */ +struct rpc_cred *rpc_lookup_cred(void) +{ + return rpcauth_lookupcred(&generic_auth, 0); +} +EXPORT_SYMBOL_GPL(rpc_lookup_cred); + +/* + * Public call interface for looking up machine creds. + */ +struct rpc_cred *rpc_lookup_machine_cred(void) +{ + struct auth_cred acred = { + .uid = RPC_ANONYMOUS_USERID, + .gid = RPC_ANONYMOUS_GROUPID, + .machine_cred = 1, + }; + + dprintk("RPC: looking up machine cred\n"); + return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); +} +EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); + +static void +generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; + struct rpc_cred *ret; + + ret = auth->au_ops->lookup_cred(auth, acred, 0); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); +} + +/* + * Lookup generic creds for current process + */ +static struct rpc_cred * +generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + return rpcauth_lookup_credcache(&generic_auth, acred, flags); +} + +static struct rpc_cred * +generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + struct generic_cred *gcred; + + gcred = kmalloc(sizeof(*gcred), GFP_KERNEL); + if (gcred == NULL) + return ERR_PTR(-ENOMEM); + + rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops); + gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + + gcred->acred.uid = acred->uid; + gcred->acred.gid = acred->gid; + gcred->acred.group_info = acred->group_info; + if (gcred->acred.group_info != NULL) + get_group_info(gcred->acred.group_info); + gcred->acred.machine_cred = acred->machine_cred; + + dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", + gcred->acred.machine_cred ? "machine" : "generic", + gcred, acred->uid, acred->gid); + return &gcred->gc_base; +} + +static void +generic_free_cred(struct rpc_cred *cred) +{ + struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + + dprintk("RPC: generic_free_cred %p\n", gcred); + if (gcred->acred.group_info != NULL) + put_group_info(gcred->acred.group_info); + kfree(gcred); +} + +static void +generic_free_cred_callback(struct rcu_head *head) +{ + struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu); + generic_free_cred(cred); +} + +static void +generic_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, generic_free_cred_callback); +} + +/* + * Match credentials against current process creds. + */ +static int +generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) +{ + struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + + if (gcred->acred.uid != acred->uid || + gcred->acred.gid != acred->gid || + gcred->acred.group_info != acred->group_info || + gcred->acred.machine_cred != acred->machine_cred) + return 0; + return 1; +} + +void __init rpc_init_generic_auth(void) +{ + spin_lock_init(&generic_cred_cache.lock); +} + +void __exit rpc_destroy_generic_auth(void) +{ + rpcauth_clear_credcache(&generic_cred_cache); +} + +static struct rpc_cred_cache generic_cred_cache = { + {{ NULL, },}, +}; + +static const struct rpc_authops generic_auth_ops = { + .owner = THIS_MODULE, + .au_name = "Generic", + .lookup_cred = generic_lookup_cred, + .crcreate = generic_create_cred, +}; + +static struct rpc_auth generic_auth = { + .au_ops = &generic_auth_ops, + .au_count = ATOMIC_INIT(0), + .au_credcache = &generic_cred_cache, +}; + +static const struct rpc_credops generic_credops = { + .cr_name = "Generic cred", + .crdestroy = generic_destroy_cred, + .crbind = generic_bind_cred, + .crmatch = generic_match, +}; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5828e5c060ca..cc12d5f5d5da 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -114,27 +114,14 @@ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - struct gss_cl_ctx *old; - old = gss_cred->gc_ctx; + if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) + return; + gss_get_ctx(ctx); rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + smp_mb__before_clear_bit(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); - if (old) - gss_put_ctx(old); -} - -static int -gss_cred_is_uptodate_ctx(struct rpc_cred *cred) -{ - struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - int res = 0; - - rcu_read_lock(); - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) - res = 1; - rcu_read_unlock(); - return res; } static const void * @@ -266,6 +253,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); + rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); kfree(gss_msg); } @@ -339,7 +327,7 @@ gss_upcall_callback(struct rpc_task *task) spin_lock(&inode->i_lock); if (gss_msg->ctx) - gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); + gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx); else task->tk_status = gss_msg->msg.errno; gss_cred->gc_upcall = NULL; @@ -370,9 +358,16 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) static struct gss_upcall_msg * gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) { + struct gss_cred *gss_cred = container_of(cred, + struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; + uid_t uid = cred->cr_uid; - gss_new = gss_alloc_msg(gss_auth, cred->cr_uid); + /* Special case: rpc.gssd assumes that uid == 0 implies machine creds */ + if (gss_cred->gc_machine_cred != 0) + uid = 0; + + gss_new = gss_alloc_msg(gss_auth, uid); if (gss_new == NULL) return ERR_PTR(-ENOMEM); gss_msg = gss_add_msg(gss_auth, gss_new); @@ -408,13 +403,17 @@ gss_refresh_upcall(struct rpc_task *task) } spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) - rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); - else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { + rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); + else if (gss_msg->ctx != NULL) { + gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx); + gss_cred->gc_upcall = NULL; + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); + } else if (gss_msg->msg.errno >= 0) { task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ atomic_inc(&gss_msg->count); - rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); + rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else err = gss_msg->msg.errno; spin_unlock(&inode->i_lock); @@ -454,7 +453,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) schedule(); } if (gss_msg->ctx) - gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); + gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; spin_unlock(&inode->i_lock); @@ -709,7 +708,7 @@ gss_destroying_context(struct rpc_cred *cred) struct rpc_task *task; if (gss_cred->gc_ctx == NULL || - gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY) + test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) return 0; gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; @@ -719,7 +718,7 @@ gss_destroying_context(struct rpc_cred *cred) * by the RPC call or by the put_rpccred() below */ get_rpccred(cred); - task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC); + task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT); if (!IS_ERR(task)) rpc_put_task(task); @@ -817,6 +816,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; + cred->gc_machine_cred = acred->machine_cred; kref_get(&gss_auth->kref); return &cred->gc_base; @@ -843,17 +843,16 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); - /* - * If the searchflags have set RPCAUTH_LOOKUP_NEW, then - * we don't really care if the credential has expired or not, - * since the caller should be prepared to reinitialise it. - */ - if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) + if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ - if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) + if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) + return 0; + if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: + if (acred->machine_cred != gss_cred->gc_machine_cred) + return 0; return (rc->cr_uid == acred->uid); } @@ -917,16 +916,48 @@ out_put_ctx: return NULL; } +static int gss_renew_cred(struct rpc_task *task) +{ + struct rpc_cred *oldcred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(oldcred, + struct gss_cred, + gc_base); + struct rpc_auth *auth = oldcred->cr_auth; + struct auth_cred acred = { + .uid = oldcred->cr_uid, + .machine_cred = gss_cred->gc_machine_cred, + }; + struct rpc_cred *new; + + new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); + if (IS_ERR(new)) + return PTR_ERR(new); + task->tk_msg.rpc_cred = new; + put_rpccred(oldcred); + return 0; +} + /* * Refresh credentials. XXX - finish */ static int gss_refresh(struct rpc_task *task) { + struct rpc_cred *cred = task->tk_msg.rpc_cred; + int ret = 0; + + if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && + !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { + ret = gss_renew_cred(task); + if (ret < 0) + goto out; + cred = task->tk_msg.rpc_cred; + } - if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred)) - return gss_refresh_upcall(task); - return 0; + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) + ret = gss_refresh_upcall(task); +out: + return ret; } /* Dummy refresh routine: used only when destroying the context */ @@ -1286,9 +1317,7 @@ out: static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, -#ifdef RPC_DEBUG .au_name = "RPCSEC_GSS", -#endif .create = gss_create, .destroy = gss_destroy, .lookup_cred = gss_lookup_cred, @@ -1299,6 +1328,7 @@ static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, + .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh, @@ -1310,6 +1340,7 @@ static const struct rpc_credops gss_credops = { static const struct rpc_credops gss_nullops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh_null, diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index ea8c92ecdae5..d83b881685fe 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -148,7 +148,7 @@ int g_token_size(struct xdr_netobj *mech, unsigned int body_size) { /* set body_size to sequence contents size */ - body_size += 4 + (int) mech->len; /* NEED overflow check */ + body_size += 2 + (int) mech->len; /* NEED overflow check */ return(1 + der_length_size(body_size) + body_size); } @@ -161,7 +161,7 @@ void g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf) { *(*buf)++ = 0x60; - der_write_length(buf, 4 + mech->len + body_size); + der_write_length(buf, 2 + mech->len + body_size); *(*buf)++ = 0x06; *(*buf)++ = (unsigned char) mech->len; TWRITE_STR(*buf, mech->data, ((int) mech->len)); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 0dd792338fa9..1d52308ca324 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -66,8 +66,8 @@ krb5_encrypt( goto out; if (crypto_blkcipher_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_blkcipher_ivsize(tfm)); + dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", + crypto_blkcipher_ivsize(tfm)); goto out; } @@ -102,7 +102,7 @@ krb5_decrypt( goto out; if (crypto_blkcipher_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", + dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", crypto_blkcipher_ivsize(tfm)); goto out; } diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index dedcbd6108f4..5f1d36dfbcf7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -87,10 +87,10 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, now = get_seconds(); - token->len = g_token_size(&ctx->mech_used, 22); + token->len = g_token_size(&ctx->mech_used, 24); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22, &ptr); + g_make_token_header(&ctx->mech_used, 24, &ptr); *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); @@ -109,15 +109,14 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send++; spin_unlock(&krb5_seq_lock); if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)) + seq_send, krb5_hdr + 16, krb5_hdr + 8)) return GSS_S_FAILURE; return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 43f3421f1e6a..f160be6c1a46 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -43,7 +43,7 @@ s32 krb5_make_seq_num(struct crypto_blkcipher *key, int direction, - s32 seqnum, + u32 seqnum, unsigned char *cksum, unsigned char *buf) { unsigned char plain[8]; @@ -65,7 +65,7 @@ s32 krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, - int *direction, s32 * seqnum) + int *direction, u32 *seqnum) { s32 code; unsigned char plain[8]; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index e30a993466bc..d91a5d004803 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -82,7 +82,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; s32 now; int direction; - s32 seqnum; + u32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; int bodysize; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 3bdc527ee64a..b00b1b426301 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -137,7 +137,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; - headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + headlen = g_token_size(&kctx->mech_used, 24 + plainlen) - (buf->len - offset); ptr = buf->head[0].iov_base + offset; @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, buf->len += headlen; BUG_ON((buf->len - offset - headlen) % blocksize); - g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + g_make_token_header(&kctx->mech_used, 24 + plainlen, &ptr); *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); @@ -176,9 +176,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, md5cksum.data, md5cksum.len)) return GSS_S_FAILURE; - memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, - KRB5_CKSUM_LENGTH); + memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - 8, 8); spin_lock(&krb5_seq_lock); seq_send = kctx->seq_send++; diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index abf17ce2e3b1..c832712f8d55 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -107,10 +107,10 @@ spkm3_make_token(struct spkm3_ctx *ctx, tokenlen = 10 + ctxelen + 1 + md5elen + 1; /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen); + token->len = g_token_size(&ctx->mech_used, tokenlen + 2); ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen, &ptr); + g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr); spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 481f984e9a22..5905d56737d6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1146,7 +1146,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_INTEGRITY: if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) - goto auth_err; + goto garbage_args; /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); @@ -1154,7 +1154,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) - goto auth_err; + goto garbage_args; /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); @@ -1169,6 +1169,11 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) ret = SVC_OK; goto out; } +garbage_args: + /* Restore write pointer to its original value: */ + xdr_ressize_check(rqstp, reject_stat); + ret = SVC_GARBAGE; + goto out; auth_err: /* Restore write pointer to its original value: */ xdr_ressize_check(rqstp, reject_stat); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 537d0e8589dd..c70dd7f5258e 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -104,9 +104,7 @@ nul_validate(struct rpc_task *task, __be32 *p) const struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, -#ifdef RPC_DEBUG .au_name = "NULL", -#endif .create = nul_create, .destroy = nul_destroy, .lookup_cred = nul_lookup_cred, @@ -125,6 +123,7 @@ static const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = nul_match, .crmarshal = nul_marshal, .crrefresh = nul_refresh, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 5ed91e5bcee4..44920b90bdc4 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -60,7 +60,8 @@ static struct rpc_cred * unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { struct unx_cred *cred; - int i; + unsigned int groups = 0; + unsigned int i; dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); @@ -70,21 +71,17 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { - cred->uc_uid = 0; - cred->uc_gid = 0; - cred->uc_gids[0] = NOGROUP; - } else { - int groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; - - cred->uc_gid = acred->gid; - for (i = 0; i < groups; i++) - cred->uc_gids[i] = GROUP_AT(acred->group_info, i); - if (i < NFS_NGROUPS) - cred->uc_gids[i] = NOGROUP; - } + + if (acred->group_info != NULL) + groups = acred->group_info->ngroups; + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + + cred->uc_gid = acred->gid; + for (i = 0; i < groups; i++) + cred->uc_gids[i] = GROUP_AT(acred->group_info, i); + if (i < NFS_NGROUPS) + cred->uc_gids[i] = NOGROUP; return &cred->uc_base; } @@ -118,26 +115,21 @@ static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); - int i; + unsigned int groups = 0; + unsigned int i; - if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { - int groups; - if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid) - return 0; + if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid) + return 0; + if (acred->group_info != NULL) groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; - for (i = 0; i < groups ; i++) - if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) - return 0; - return 1; - } - return (cred->uc_uid == 0 - && cred->uc_gid == 0 - && cred->uc_gids[0] == (gid_t) NOGROUP); + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + for (i = 0; i < groups ; i++) + if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) + return 0; + return 1; } /* @@ -218,9 +210,7 @@ void __init rpc_init_authunix(void) const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, -#ifdef RPC_DEBUG .au_name = "UNIX", -#endif .create = unx_create, .destroy = unx_destroy, .lookup_cred = unx_lookup_cred, @@ -245,6 +235,7 @@ static const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = unx_match, .crmarshal = unx_marshal, .crrefresh = unx_refresh, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b5f2786251b9..d75530ff2a6d 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -571,7 +571,6 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) return -ETIMEDOUT; dreq->item = item; - dreq->recv_time = get_seconds(); spin_lock(&cache_defer_lock); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7b96ff38002f..8945307556ec 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(rpc_run_task); * @msg: RPC call parameters * @flags: RPC call flags */ -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags) { struct rpc_task *task; struct rpc_task_setup task_setup_data = { @@ -575,7 +575,7 @@ EXPORT_SYMBOL_GPL(rpc_call_sync); * @data: user call data */ int -rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, +rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; @@ -1062,7 +1062,7 @@ call_transmit(struct rpc_task *task) if (task->tk_msg.rpc_proc->p_decode != NULL) return; task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&task->tk_xprt->pending, task); } /* @@ -1116,7 +1116,8 @@ call_status(struct rpc_task *task) case -ETIMEDOUT: task->tk_action = call_timeout; if (task->tk_client->cl_discrtry) - xprt_force_disconnect(task->tk_xprt); + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); break; case -ECONNREFUSED: case -ENOTCONN: @@ -1168,6 +1169,11 @@ call_timeout(struct rpc_task *task) clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); + /* + * Did our request time out due to an RPCSEC_GSS out-of-sequence + * event? RFC2203 requires the server to drop all such requests. + */ + rpcauth_invalcred(task); retry: clnt->cl_stats->rpcretrans++; @@ -1195,18 +1201,6 @@ call_decode(struct rpc_task *task) task->tk_flags &= ~RPC_CALL_MAJORSEEN; } - if (task->tk_status < 12) { - if (!RPC_IS_SOFT(task)) { - task->tk_action = call_bind; - clnt->cl_stats->rpcretrans++; - goto out_retry; - } - dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", - clnt->cl_protname, task->tk_status); - task->tk_action = call_timeout; - goto out_retry; - } - /* * Ensure that we see all writes made by xprt_complete_rqst() * before it changed req->rq_received. @@ -1218,6 +1212,18 @@ call_decode(struct rpc_task *task) WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, sizeof(req->rq_rcv_buf)) != 0); + if (req->rq_rcv_buf.len < 12) { + if (!RPC_IS_SOFT(task)) { + task->tk_action = call_bind; + clnt->cl_stats->rpcretrans++; + goto out_retry; + } + dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", + clnt->cl_protname, task->tk_status); + task->tk_action = call_timeout; + goto out_retry; + } + /* Verify the RPC header */ p = call_verify(task); if (IS_ERR(p)) { @@ -1236,10 +1242,14 @@ call_decode(struct rpc_task *task) task->tk_status); return; out_retry: - req->rq_received = req->rq_private_buf.len = 0; task->tk_status = 0; - if (task->tk_client->cl_discrtry) - xprt_force_disconnect(task->tk_xprt); + /* Note: call_verify() may have freed the RPC slot */ + if (task->tk_rqstp == req) { + req->rq_received = req->rq_rcv_buf.len = 0; + if (task->tk_client->cl_discrtry) + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); + } } /* @@ -1531,7 +1541,7 @@ void rpc_show_tasks(void) proc = -1; if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + rpc_waitq = rpc_qname(t->tk_waitqueue); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", t->tk_pid, proc, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 56aa018dce3a..0517967a68bf 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -298,7 +298,7 @@ void rpcb_getport_async(struct rpc_task *task) /* Put self on queue before sending rpcbind request, in case * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL, NULL); + rpc_sleep_on(&xprt->binding, task, NULL); /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4c669121e607..6eab9bf94baf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -38,9 +38,9 @@ static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; -static void __rpc_default_timer(struct rpc_task *task); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); +static void __rpc_queue_timer_fn(unsigned long ptr); /* * RPC tasks sit here while waiting for conditions to improve. @@ -57,41 +57,30 @@ struct workqueue_struct *rpciod_workqueue; * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ -static inline void -__rpc_disable_timer(struct rpc_task *task) +static void +__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { + if (task->tk_timeout == 0) + return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); - task->tk_timeout_fn = NULL; task->tk_timeout = 0; + list_del(&task->u.tk_wait.timer_list); + if (list_empty(&queue->timer_list.list)) + del_timer(&queue->timer_list.timer); } -/* - * Run a timeout function. - * We use the callback in order to allow __rpc_wake_up_task() - * and friends to disable the timer synchronously on SMP systems - * without calling del_timer_sync(). The latter could cause a - * deadlock if called while we're holding spinlocks... - */ -static void rpc_run_timer(struct rpc_task *task) +static void +rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) { - void (*callback)(struct rpc_task *); - - callback = task->tk_timeout_fn; - task->tk_timeout_fn = NULL; - if (callback && RPC_IS_QUEUED(task)) { - dprintk("RPC: %5u running timer\n", task->tk_pid); - callback(task); - } - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - smp_mb__after_clear_bit(); + queue->timer_list.expires = expires; + mod_timer(&queue->timer_list.timer, expires); } /* * Set up a timer for the current task. */ -static inline void -__rpc_add_timer(struct rpc_task *task, rpc_action timer) +static void +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (!task->tk_timeout) return; @@ -99,27 +88,10 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) dprintk("RPC: %5u setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - if (timer) - task->tk_timeout_fn = timer; - else - task->tk_timeout_fn = __rpc_default_timer; - set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - mod_timer(&task->tk_timer, jiffies + task->tk_timeout); -} - -/* - * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding queue->lock. - */ -static void -rpc_delete_timer(struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task)) - return; - if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { - del_singleshot_timer_sync(&task->tk_timer); - dprintk("RPC: %5u deleting timer\n", task->tk_pid); - } + task->u.tk_wait.expires = jiffies + task->tk_timeout; + if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) + rpc_set_queue_timer(queue, task->u.tk_wait.expires); + list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } /* @@ -161,7 +133,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task * list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); - task->u.tk_wait.rpc_waitq = queue; + task->tk_waitqueue = queue; queue->qlen++; rpc_set_queued(task); @@ -181,22 +153,18 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->u.tk_wait.list); } /* * Remove request from queue. * Note: must be called with spin lock held. */ -static void __rpc_remove_wait_queue(struct rpc_task *task) +static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - struct rpc_wait_queue *queue; - queue = task->u.tk_wait.rpc_waitq; - + __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - else - list_del(&task->u.tk_wait.list); + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -229,6 +197,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); + queue->qlen = 0; + setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + INIT_LIST_HEAD(&queue->timer_list.list); #ifdef RPC_DEBUG queue->name = qname; #endif @@ -245,6 +216,12 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) } EXPORT_SYMBOL_GPL(rpc_init_wait_queue); +void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) +{ + del_timer_sync(&queue->timer_list.timer); +} +EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); + static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) @@ -313,7 +290,6 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { - BUG_ON(task->tk_timeout_fn); rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; @@ -326,7 +302,7 @@ static void rpc_make_runnable(struct rpc_task *task) int status; INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(task->tk_workqueue, &task->u.tk_work); + status = queue_work(rpciod_workqueue, &task->u.tk_work); if (status < 0) { printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); task->tk_status = status; @@ -343,7 +319,7 @@ static void rpc_make_runnable(struct rpc_task *task) * as it's on a wait queue. */ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -357,11 +333,11 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, BUG_ON(task->tk_callback != NULL); task->tk_callback = action; - __rpc_add_timer(task, timer); + __rpc_add_timer(q, task); } void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { /* Mark the task as being activated if so needed */ rpc_set_active(task); @@ -370,18 +346,19 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action, timer); + __rpc_sleep_on(q, task, action); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); /** * __rpc_do_wake_up_task - wake up a single rpc_task + * @queue: wait queue * @task: task to be woken up * * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void __rpc_do_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task) { dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); @@ -395,8 +372,7 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) return; } - __rpc_disable_timer(task); - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -404,48 +380,32 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) } /* - * Wake up the specified task + * Wake up a queued task while the queue lock is being held */ -static void __rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) - __rpc_do_wake_up_task(task); - rpc_finish_wakeup(task); - } + if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); } /* - * Default timeout handler if none specified by user + * Wake up a task on a specific queue */ -static void -__rpc_default_timer(struct rpc_task *task) +void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; - rpc_wake_up_task(task); + spin_lock_bh(&queue->lock); + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock_bh(&queue->lock); } +EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the specified task */ -void rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task(struct rpc_task *task) { - rcu_read_lock_bh(); - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); - __rpc_do_wake_up_task(task); - spin_unlock(&queue->lock); - } - rpc_finish_wakeup(task); - } - rcu_read_unlock_bh(); + rpc_wake_up_queued_task(task->tk_waitqueue, task); } -EXPORT_SYMBOL_GPL(rpc_wake_up_task); /* * Wake up the next task on a priority queue. @@ -495,7 +455,7 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); return task; } @@ -508,16 +468,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); return task; } @@ -534,18 +492,16 @@ void rpc_wake_up(struct rpc_wait_queue *queue) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up); @@ -561,26 +517,48 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { task->tk_status = status; - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up_status); +static void __rpc_queue_timer_fn(unsigned long ptr) +{ + struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_task *task, *n; + unsigned long expires, now, timeo; + + spin_lock(&queue->lock); + expires = now = jiffies; + list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { + timeo = task->u.tk_wait.expires; + if (time_after_eq(now, timeo)) { + dprintk("RPC: %5u timeout\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; + rpc_wake_up_task_queue_locked(queue, task); + continue; + } + if (expires == now || time_after(expires, timeo)) + expires = timeo; + } + if (!list_empty(&queue->timer_list.list)) + rpc_set_queue_timer(queue, expires); + spin_unlock(&queue->lock); +} + static void __rpc_atrun(struct rpc_task *task) { - rpc_wake_up_task(task); + task->tk_status = 0; } /* @@ -589,7 +567,7 @@ static void __rpc_atrun(struct rpc_task *task) void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); + rpc_sleep_on(&delay_queue, task, __rpc_atrun); } EXPORT_SYMBOL_GPL(rpc_delay); @@ -644,10 +622,6 @@ static void __rpc_execute(struct rpc_task *task) BUG_ON(RPC_IS_QUEUED(task)); for (;;) { - /* - * Garbage collection of pending timers... - */ - rpc_delete_timer(task); /* * Execute any pending callback. @@ -816,8 +790,6 @@ EXPORT_SYMBOL_GPL(rpc_free); static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data) { memset(task, 0, sizeof(*task)); - setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer, - (unsigned long)task); atomic_set(&task->tk_count, 1); task->tk_flags = task_setup_data->flags; task->tk_ops = task_setup_data->callback_ops; @@ -832,7 +804,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_owner = current->tgid; /* Initialize workqueue for async tasks */ - task->tk_workqueue = rpciod_workqueue; + task->tk_workqueue = task_setup_data->workqueue; task->tk_client = task_setup_data->rpc_client; if (task->tk_client != NULL) { @@ -845,12 +817,11 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; if (task_setup_data->rpc_message != NULL) { - memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg)); + task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; + task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; + task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; /* Bind the user cred */ - if (task->tk_msg.rpc_cred != NULL) - rpcauth_holdcred(task); - else - rpcauth_bindcred(task); + rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); if (task->tk_action == NULL) rpc_call_start(task); } @@ -868,13 +839,6 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rcu_head *rcu) -{ - struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); - dprintk("RPC: %5u freeing task\n", task->tk_pid); - mempool_free(task, rpc_task_mempool); -} - /* * Create a new task for the specified client. */ @@ -898,12 +862,25 @@ out: return task; } - -void rpc_put_task(struct rpc_task *task) +static void rpc_free_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; + if (task->tk_flags & RPC_TASK_DYNAMIC) { + dprintk("RPC: %5u freeing task\n", task->tk_pid); + mempool_free(task, rpc_task_mempool); + } + rpc_release_calldata(tk_ops, calldata); +} + +static void rpc_async_release(struct work_struct *work) +{ + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); +} + +void rpc_put_task(struct rpc_task *task) +{ if (!atomic_dec_and_test(&task->tk_count)) return; /* Release resources */ @@ -915,9 +892,11 @@ void rpc_put_task(struct rpc_task *task) rpc_release_client(task->tk_client); task->tk_client = NULL; } - if (task->tk_flags & RPC_TASK_DYNAMIC) - call_rcu_bh(&task->u.tk_rcu, rpc_free_task); - rpc_release_calldata(tk_ops, calldata); + if (task->tk_workqueue != NULL) { + INIT_WORK(&task->u.tk_work, rpc_async_release); + queue_work(task->tk_workqueue, &task->u.tk_work); + } else + rpc_free_task(task); } EXPORT_SYMBOL_GPL(rpc_put_task); @@ -937,9 +916,6 @@ static void rpc_release_task(struct rpc_task *task) } BUG_ON (RPC_IS_QUEUED(task)); - /* Synchronously delete any running timer */ - rpc_delete_timer(task); - #ifdef RPC_DEBUG task->tk_magic = 0; #endif @@ -1029,11 +1005,20 @@ rpc_destroy_mempool(void) kmem_cache_destroy(rpc_task_slabp); if (rpc_buffer_slabp) kmem_cache_destroy(rpc_buffer_slabp); + rpc_destroy_wait_queue(&delay_queue); } int rpc_init_mempool(void) { + /* + * The following is not strictly a mempool initialisation, + * but there is no harm in doing it here + */ + rpc_init_wait_queue(&delay_queue, "delayq"); + if (!rpciod_start()) + goto err_nomem; + rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, @@ -1054,13 +1039,6 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; - if (!rpciod_start()) - goto err_nomem; - /* - * The following is not strictly a mempool initialisation, - * but there is no harm in doing it here - */ - rpc_init_wait_queue(&delay_queue, "delayq"); return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a290e1523297..d74c2d269539 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -301,7 +301,6 @@ static inline int svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) { struct svc_pool_map *m = &svc_pool_map; - unsigned int node; /* or cpu */ /* * The caller checks for sv_nrpools > 1, which @@ -314,16 +313,23 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) default: return 0; case SVC_POOL_PERCPU: - node = m->pool_to[pidx]; + { + unsigned int cpu = m->pool_to[pidx]; + *oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(node)); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); return 1; + } case SVC_POOL_PERNODE: - node = m->pool_to[pidx]; + { + unsigned int node = m->pool_to[pidx]; + node_to_cpumask_ptr(nodecpumask, node); + *oldmask = current->cpus_allowed; - set_cpus_allowed(current, node_to_cpumask(node)); + set_cpus_allowed_ptr(current, nodecpumask); return 1; } + } } /* @@ -504,8 +510,7 @@ EXPORT_SYMBOL(svc_destroy); static int svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) { - int pages; - int arghi; + unsigned int pages, arghi; pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page @@ -519,7 +524,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) rqstp->rq_pages[arghi++] = p; pages--; } - return ! pages; + return pages == 0; } /* @@ -528,8 +533,9 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) static void svc_release_buffer(struct svc_rqst *rqstp) { - int i; - for (i=0; i<ARRAY_SIZE(rqstp->rq_pages); i++) + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) if (rqstp->rq_pages[i]) put_page(rqstp->rq_pages[i]); } @@ -584,7 +590,7 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, struct svc_rqst *rqstp; int error = -ENOMEM; int have_oldmask = 0; - cpumask_t oldmask; + cpumask_t uninitialized_var(oldmask); rqstp = svc_prepare_thread(serv, pool); if (IS_ERR(rqstp)) { @@ -613,16 +619,6 @@ out_thread: } /* - * Create a thread in the default pool. Caller must hold BKL. - */ -int -svc_create_thread(svc_thread_fn func, struct svc_serv *serv) -{ - return __svc_create_thread(func, serv, &serv->sv_pools[0]); -} -EXPORT_SYMBOL(svc_create_thread); - -/* * Choose a pool in which to create a new thread, for svc_set_num_threads */ static inline struct svc_pool * @@ -915,8 +911,7 @@ svc_process(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - rpc_stat = rpc_garbage_args; - goto err_bad; + goto err_garbage; case SVC_SYSERR: rpc_stat = rpc_system_err; goto err_bad; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 332eb47539e1..d8e8d79a8451 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -18,6 +18,7 @@ #include <linux/skbuff.h> #include <linux/file.h> #include <linux/freezer.h> +#include <linux/kthread.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> @@ -586,8 +587,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - int j = msecs_to_jiffies(500); - schedule_timeout_uninterruptible(j); + set_current_state(TASK_INTERRUPTIBLE); + if (signalled() || kthread_should_stop()) { + set_current_state(TASK_RUNNING); + return -EINTR; + } + schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_pages[i] = p; } @@ -607,7 +612,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); - if (signalled()) + if (signalled() || kthread_should_stop()) return -EINTR; spin_lock_bh(&pool->sp_lock); @@ -626,6 +631,20 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + + /* + * checking kthread_should_stop() here allows us to avoid + * locking and signalling when stopping kthreads that call + * svc_recv. If the thread has already been woken up, then + * we can exit here without sleeping. If not, then it + * it'll be woken up quickly during the schedule_timeout + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + spin_unlock_bh(&pool->sp_lock); + return -EINTR; + } + add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); @@ -641,7 +660,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_thread_dequeue(pool, rqstp); spin_unlock_bh(&pool->sp_lock); dprintk("svc: server %p, no data yet\n", rqstp); - return signalled()? -EINTR : -EAGAIN; + if (signalled() || kthread_should_stop()) + return -EINTR; + else + return -EAGAIN; } } spin_unlock_bh(&pool->sp_lock); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3c64051e4555..3f30ee6006ae 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -11,7 +11,8 @@ #include <linux/hash.h> #include <linux/string.h> #include <net/sock.h> - +#include <net/ipv6.h> +#include <linux/kernel.h> #define RPCDBG_FACILITY RPCDBG_AUTH @@ -85,7 +86,7 @@ static void svcauth_unix_domain_release(struct auth_domain *dom) struct ip_map { struct cache_head h; char m_class[8]; /* e.g. "nfsd" */ - struct in_addr m_addr; + struct in6_addr m_addr; struct unix_domain *m_client; int m_add_change; }; @@ -113,12 +114,19 @@ static inline int hash_ip(__be32 ip) return (hash ^ (hash>>8)) & 0xff; } #endif +static inline int hash_ip6(struct in6_addr ip) +{ + return (hash_ip(ip.s6_addr32[0]) ^ + hash_ip(ip.s6_addr32[1]) ^ + hash_ip(ip.s6_addr32[2]) ^ + hash_ip(ip.s6_addr32[3])); +} static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { struct ip_map *orig = container_of(corig, struct ip_map, h); struct ip_map *new = container_of(cnew, struct ip_map, h); return strcmp(orig->m_class, new->m_class) == 0 - && orig->m_addr.s_addr == new->m_addr.s_addr; + && ipv6_addr_equal(&orig->m_addr, &new->m_addr); } static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { @@ -126,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) struct ip_map *item = container_of(citem, struct ip_map, h); strcpy(new->m_class, item->m_class); - new->m_addr.s_addr = item->m_addr.s_addr; + ipv6_addr_copy(&new->m_addr, &item->m_addr); } static void update(struct cache_head *cnew, struct cache_head *citem) { @@ -150,22 +158,24 @@ static void ip_map_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) { - char text_addr[20]; + char text_addr[40]; struct ip_map *im = container_of(h, struct ip_map, h); - __be32 addr = im->m_addr.s_addr; - - snprintf(text_addr, 20, "%u.%u.%u.%u", - ntohl(addr) >> 24 & 0xff, - ntohl(addr) >> 16 & 0xff, - ntohl(addr) >> 8 & 0xff, - ntohl(addr) >> 0 & 0xff); + if (ipv6_addr_v4mapped(&(im->m_addr))) { + snprintf(text_addr, 20, NIPQUAD_FMT, + ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff, + ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff); + } else { + snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr)); + } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); (*bpp)[-1] = '\n'; } -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr); +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); static int ip_map_parse(struct cache_detail *cd, @@ -176,10 +186,10 @@ static int ip_map_parse(struct cache_detail *cd, * for scratch: */ char *buf = mesg; int len; - int b1,b2,b3,b4; + int b1, b2, b3, b4, b5, b6, b7, b8; char c; char class[8]; - struct in_addr addr; + struct in6_addr addr; int err; struct ip_map *ipmp; @@ -198,7 +208,23 @@ static int ip_map_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; - if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) + if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) { + addr.s6_addr32[0] = 0; + addr.s6_addr32[1] = 0; + addr.s6_addr32[2] = htonl(0xffff); + addr.s6_addr32[3] = + htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); + } else if (sscanf(buf, NIP6_FMT "%c", + &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { + addr.s6_addr16[0] = htons(b1); + addr.s6_addr16[1] = htons(b2); + addr.s6_addr16[2] = htons(b3); + addr.s6_addr16[3] = htons(b4); + addr.s6_addr16[4] = htons(b5); + addr.s6_addr16[5] = htons(b6); + addr.s6_addr16[6] = htons(b7); + addr.s6_addr16[7] = htons(b8); + } else return -EINVAL; expiry = get_expiry(&mesg); @@ -216,10 +242,7 @@ static int ip_map_parse(struct cache_detail *cd, } else dom = NULL; - addr.s_addr = - htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - - ipmp = ip_map_lookup(class,addr); + ipmp = ip_map_lookup(class, &addr); if (ipmp) { err = ip_map_update(ipmp, container_of(dom, struct unix_domain, h), @@ -239,7 +262,7 @@ static int ip_map_show(struct seq_file *m, struct cache_head *h) { struct ip_map *im; - struct in_addr addr; + struct in6_addr addr; char *dom = "-no-domain-"; if (h == NULL) { @@ -248,20 +271,24 @@ static int ip_map_show(struct seq_file *m, } im = container_of(h, struct ip_map, h); /* class addr domain */ - addr = im->m_addr; + ipv6_addr_copy(&addr, &im->m_addr); if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) dom = im->m_client->h.name; - seq_printf(m, "%s %d.%d.%d.%d %s\n", - im->m_class, - ntohl(addr.s_addr) >> 24 & 0xff, - ntohl(addr.s_addr) >> 16 & 0xff, - ntohl(addr.s_addr) >> 8 & 0xff, - ntohl(addr.s_addr) >> 0 & 0xff, - dom - ); + if (ipv6_addr_v4mapped(&addr)) { + seq_printf(m, "%s" NIPQUAD_FMT "%s\n", + im->m_class, + ntohl(addr.s6_addr32[3]) >> 24 & 0xff, + ntohl(addr.s6_addr32[3]) >> 16 & 0xff, + ntohl(addr.s6_addr32[3]) >> 8 & 0xff, + ntohl(addr.s6_addr32[3]) >> 0 & 0xff, + dom); + } else { + seq_printf(m, "%s" NIP6_FMT "%s\n", + im->m_class, NIP6(addr), dom); + } return 0; } @@ -281,16 +308,16 @@ struct cache_detail ip_map_cache = { .alloc = ip_map_alloc, }; -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr) +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr) { struct ip_map ip; struct cache_head *ch; strcpy(ip.m_class, class); - ip.m_addr = addr; + ipv6_addr_copy(&ip.m_addr, addr); ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, hash_str(class, IP_HASHBITS) ^ - hash_ip(addr.s_addr)); + hash_ip6(*addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -319,14 +346,14 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex ch = sunrpc_cache_update(&ip_map_cache, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ - hash_ip(ipm->m_addr.s_addr)); + hash_ip6(ipm->m_addr)); if (!ch) return -ENOMEM; cache_put(ch, &ip_map_cache); return 0; } -int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) +int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; struct ip_map *ipmp; @@ -355,7 +382,7 @@ int auth_unix_forget_old(struct auth_domain *dom) } EXPORT_SYMBOL(auth_unix_forget_old); -struct auth_domain *auth_unix_lookup(struct in_addr addr) +struct auth_domain *auth_unix_lookup(struct in6_addr *addr) { struct ip_map *ipm; struct auth_domain *rv; @@ -650,9 +677,24 @@ static int unix_gid_find(uid_t uid, struct group_info **gip, int svcauth_unix_set_client(struct svc_rqst *rqstp) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6, sin6_storage; struct ip_map *ipm; + switch (rqstp->rq_addr.ss_family) { + case AF_INET: + sin = svc_addr_in(rqstp); + sin6 = &sin6_storage; + ipv6_addr_set(&sin6->sin6_addr, 0, 0, + htonl(0x0000FFFF), sin->sin_addr.s_addr); + break; + case AF_INET6: + sin6 = svc_addr_in6(rqstp); + break; + default: + BUG(); + } + rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) return SVC_OK; @@ -660,7 +702,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) ipm = ip_map_cached_get(rqstp); if (ipm == NULL) ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, - sin->sin_addr); + &sin6->sin6_addr); if (ipm == NULL) return SVC_DENIED; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c475977de05a..3e65719f1ef6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -38,6 +38,7 @@ #include <net/checksum.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/tcp.h> #include <net/tcp_states.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -45,6 +46,7 @@ #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/stats.h> @@ -822,8 +824,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) * the next four bytes. Otherwise try to gobble up as much as * possible up to the complete record length. */ - if (svsk->sk_tcplen < 4) { - unsigned long want = 4 - svsk->sk_tcplen; + if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; @@ -833,32 +835,31 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_tcplen += len; if (len < want) { - dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", - len, want); + dprintk("svc: short recvfrom while reading record " + "length (%d of %d)\n", len, want); svc_xprt_received(&svsk->sk_xprt); return -EAGAIN; /* record header not complete */ } svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & 0x80000000)) { + if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { /* FIXME: technically, a record can be fragmented, * and non-terminal fragments will not have the top * bit set in the fragment length header. * But apparently no known nfs clients send fragmented * records. */ if (net_ratelimit()) - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" - " (non-terminal)\n", - (unsigned long) svsk->sk_reclen); + printk(KERN_NOTICE "RPC: multiple fragments " + "per record not supported\n"); goto err_delete; } - svsk->sk_reclen &= 0x7fffffff; + svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_max_mesg) { if (net_ratelimit()) - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" - " (large)\n", - (unsigned long) svsk->sk_reclen); + printk(KERN_NOTICE "RPC: " + "fragment too large: 0x%08lx\n", + (unsigned long)svsk->sk_reclen); goto err_delete; } } @@ -1045,7 +1046,6 @@ void svc_cleanup_xprt_sock(void) static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) { struct sock *sk = svsk->sk_sk; - struct tcp_sock *tp = tcp_sk(sk); svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); @@ -1063,7 +1063,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; - tp->nonagle = 1; /* disable Nagle's algorithm */ + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; /* initialise setting must have enough space to * receive and respond to one request. @@ -1101,6 +1101,7 @@ void svc_sock_update_bufs(struct svc_serv *serv) } spin_unlock_bh(&serv->sv_lock); } +EXPORT_SYMBOL(svc_sock_update_bufs); /* * Initialize socket for RPC use and create svc_sock struct diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 61880cc90e86..e1770f7ba0b3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -188,9 +188,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -238,9 +238,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -447,13 +447,13 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); * @task: task to be put to sleep * @action: function pointer to be executed after wait */ -void xprt_wait_for_buffer_space(struct rpc_task *task) +void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); + rpc_sleep_on(&xprt->pending, task, action); } EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); @@ -472,7 +472,7 @@ void xprt_write_space(struct rpc_xprt *xprt) if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " "xprt %p\n", xprt); - rpc_wake_up_task(xprt->snd_task); + rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); } spin_unlock_bh(&xprt->transport_lock); } @@ -602,11 +602,37 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task != NULL) - rpc_wake_up_task(xprt->snd_task); + xprt_wake_pending_tasks(xprt, -ENOTCONN); + spin_unlock_bh(&xprt->transport_lock); +} + +/** + * xprt_conditional_disconnect - force a transport to disconnect + * @xprt: transport to disconnect + * @cookie: 'connection cookie' + * + * This attempts to break the connection if and only if 'cookie' matches + * the current transport 'connection cookie'. It ensures that we don't + * try to break the connection more than once when we need to retransmit + * a batch of RPC requests. + * + */ +void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) +{ + /* Don't race with the test_bit() in xprt_clear_locked() */ + spin_lock_bh(&xprt->transport_lock); + if (cookie != xprt->connect_cookie) + goto out; + if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt)) + goto out; + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + /* Try to schedule an autoclose RPC call */ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + queue_work(rpciod_workqueue, &xprt->task_cleanup); + xprt_wake_pending_tasks(xprt, -ENOTCONN); +out: spin_unlock_bh(&xprt->transport_lock); } -EXPORT_SYMBOL_GPL(xprt_force_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -653,7 +679,7 @@ void xprt_connect(struct rpc_task *task) task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = xprt->connect_timeout; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + rpc_sleep_on(&xprt->pending, task, xprt_connect_status); xprt->stat.connect_start = jiffies; xprt->ops->connect(task); } @@ -749,18 +775,20 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt); void xprt_complete_rqst(struct rpc_task *task, int copied) { struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); - task->tk_xprt->stat.recvs++; + xprt->stat.recvs++; task->tk_rtt = (long)jiffies - req->rq_xtime; list_del_init(&req->rq_list); + req->rq_private_buf.len = copied; /* Ensure all writes are done before we update req->rq_received */ smp_wmb(); - req->rq_received = req->rq_private_buf.len = copied; - rpc_wake_up_task(task); + req->rq_received = copied; + rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -769,17 +797,17 @@ static void xprt_timer(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + if (task->tk_status != -ETIMEDOUT) + return; dprintk("RPC: %5u xprt_timer\n", task->tk_pid); - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); if (!req->rq_received) { if (xprt->ops->timer) xprt->ops->timer(task); - task->tk_status = -ETIMEDOUT; - } - task->tk_timeout = 0; - rpc_wake_up_task(task); - spin_unlock(&xprt->transport_lock); + } else + task->tk_status = 0; + spin_unlock_bh(&xprt->transport_lock); } /** @@ -849,6 +877,7 @@ void xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; + req->rq_connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); @@ -864,7 +893,7 @@ void xprt_transmit(struct rpc_task *task) if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + rpc_sleep_on(&xprt->pending, task, xprt_timer); spin_unlock_bh(&xprt->transport_lock); return; } @@ -875,7 +904,7 @@ void xprt_transmit(struct rpc_task *task) */ task->tk_status = status; if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); } static inline void do_xprt_reserve(struct rpc_task *task) @@ -895,7 +924,7 @@ static inline void do_xprt_reserve(struct rpc_task *task) dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; task->tk_timeout = 0; - rpc_sleep_on(&xprt->backlog, task, NULL, NULL); + rpc_sleep_on(&xprt->backlog, task, NULL); } /** @@ -1052,6 +1081,11 @@ static void xprt_destroy(struct kref *kref) xprt->shutdown = 1; del_timer_sync(&xprt->timer); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->resend); + rpc_destroy_wait_queue(&xprt->backlog); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 16fd3f6718ff..af408fc12634 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1036,6 +1036,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) wait_event(xprt->sc_send_wait, atomic_read(&xprt->sc_sq_count) < xprt->sc_sq_depth); + if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) + return 0; continue; } /* Bumped used SQ WR count and post */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 613daf8c1ff7..ddbe981ab516 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -136,12 +136,6 @@ static ctl_table sunrpc_table[] = { #endif /* - * How many times to try sending a request on a socket before waiting - * for the socket buffer to clear. - */ -#define XS_SENDMSG_RETRY (10U) - -/* * Time out for an RPC UDP socket connect. UDP socket connects are * synchronous, but we set a timeout anyway in case of resource * exhaustion on the local host. @@ -516,6 +510,14 @@ out: return sent; } +static void xs_nospace_callback(struct rpc_task *task) +{ + struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); + + transport->inet->sk_write_pending--; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); +} + /** * xs_nospace - place task on wait queue if transmit was incomplete * @task: task to put to sleep @@ -531,20 +533,27 @@ static void xs_nospace(struct rpc_task *task) task->tk_pid, req->rq_slen - req->rq_bytes_sent, req->rq_slen); - if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { - /* Protect against races with write_space */ - spin_lock_bh(&xprt->transport_lock); - - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) - xprt_wait_for_buffer_space(task); + /* Protect against races with write_space */ + spin_lock_bh(&xprt->transport_lock); + + /* Don't race with disconnect */ + if (xprt_connected(xprt)) { + if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + /* + * Notify TCP that we're limited by the application + * window size + */ + set_bit(SOCK_NOSPACE, &transport->sock->flags); + transport->inet->sk_write_pending++; + /* ...and wait for more buffer space */ + xprt_wait_for_buffer_space(task, xs_nospace_callback); + } + } else { + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + task->tk_status = -ENOTCONN; + } - spin_unlock_bh(&xprt->transport_lock); - } else - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); + spin_unlock_bh(&xprt->transport_lock); } /** @@ -588,19 +597,20 @@ static int xs_udp_send_request(struct rpc_task *task) } switch (status) { + case -EAGAIN: + xs_nospace(task); + break; case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - break; - case -EAGAIN: - xs_nospace(task); + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); break; default: + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); - break; } return status; @@ -650,7 +660,6 @@ static int xs_tcp_send_request(struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status; - unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -681,9 +690,10 @@ static int xs_tcp_send_request(struct rpc_task *task) return 0; } + if (status != 0) + continue; status = -EAGAIN; - if (retry++ > XS_SENDMSG_RETRY) - break; + break; } switch (status) { @@ -695,12 +705,13 @@ static int xs_tcp_send_request(struct rpc_task *task) case -ENOTCONN: case -EPIPE: status = -ENOTCONN; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); xs_tcp_shutdown(xprt); - break; } return status; @@ -1073,6 +1084,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) { struct rpc_xprt *xprt; read_descriptor_t rd_desc; + int read; dprintk("RPC: xs_tcp_data_ready...\n"); @@ -1084,8 +1096,10 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; - rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + do { + rd_desc.count = 65536; + read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + } while (read > 0); out: read_unlock(&sk->sk_callback_lock); } @@ -1128,6 +1142,7 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_FIN_WAIT1: /* The client initiated a shutdown of the socket */ + xprt->connect_cookie++; xprt->reestablish_timeout = 0; set_bit(XPRT_CLOSING, &xprt->state); smp_mb__before_clear_bit(); @@ -1140,6 +1155,7 @@ static void xs_tcp_state_change(struct sock *sk) set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); case TCP_SYN_SENT: + xprt->connect_cookie++; case TCP_CLOSING: /* * If the server closed down the connection, make sure that @@ -1186,9 +1202,11 @@ static void xs_udp_write_space(struct sock *sk) if (unlikely(!(sock = sk->sk_socket))) goto out; + clear_bit(SOCK_NOSPACE, &sock->flags); + if (unlikely(!(xprt = xprt_from_sock(sk)))) goto out; - if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) goto out; xprt_write_space(xprt); @@ -1219,9 +1237,11 @@ static void xs_tcp_write_space(struct sock *sk) if (unlikely(!(sock = sk->sk_socket))) goto out; + clear_bit(SOCK_NOSPACE, &sock->flags); + if (unlikely(!(xprt = xprt_from_sock(sk)))) goto out; - if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) goto out; xprt_write_space(xprt); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 63ed69ffad99..e18cd3628db4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -819,7 +819,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current->fs->umask); + err = mnt_want_write(nd.path.mnt); + if (err) + goto out_mknod_dput; err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); + mnt_drop_write(nd.path.mnt); if (err) goto out_mknod_dput; mutex_unlock(&nd.path.dentry->d_inode->i_mutex); |