From 0b9ea37f24e247ed69baabf27fb211aa6a3e7622 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Mar 2013 10:15:37 -0400 Subject: nfsd: eliminate one of the DRC cache searches The most common case is to do a search of the cache, followed by an insert. In the case where we have to allocate an entry off the slab, then we end up having to redo the search, which is wasteful. Better optimize the code for the common case by eliminating the initial search of the cache and always preallocating an entry. In the case of a cache hit, we'll end up just freeing that entry but that's preferable to an extra search. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index ca05f6dc3544..c61391e8e09d 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -318,55 +318,53 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; - int rtn; + int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; if (type == RC_NOCACHE) { nfsdstats.rcnocache++; - return RC_DOIT; + return rtn; } csum = nfsd_cache_csum(rqstp); + /* + * Since the common case is a cache miss followed by an insert, + * preallocate an entry. First, try to reuse the first entry on the LRU + * if it works, then go ahead and prune the LRU list. + */ spin_lock(&cache_lock); - rtn = RC_DOIT; - - rp = nfsd_cache_search(rqstp, csum); - if (rp) - goto found_entry; - - /* Try to use the first entry on the LRU */ if (!list_empty(&lru_head)) { rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); if (nfsd_cache_entry_expired(rp) || num_drc_entries >= max_drc_entries) { lru_put_end(rp); prune_cache_entries(); - goto setup_entry; + goto search_cache; } } - /* Drop the lock and allocate a new entry */ + /* No expired ones available, allocate a new one. */ spin_unlock(&cache_lock); rp = nfsd_reply_cache_alloc(); - if (!rp) { - dprintk("nfsd: unable to allocate DRC entry!\n"); - return RC_DOIT; - } spin_lock(&cache_lock); - ++num_drc_entries; + if (likely(rp)) + ++num_drc_entries; - /* - * Must search again just in case someone inserted one - * after we dropped the lock above. - */ +search_cache: found = nfsd_cache_search(rqstp, csum); if (found) { - nfsd_reply_cache_free_locked(rp); + if (likely(rp)) + nfsd_reply_cache_free_locked(rp); rp = found; goto found_entry; } + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); + goto out; + } + /* * We're keeping the one we just allocated. Are we now over the * limit? Prune one off the tip of the LRU in trade for the one we @@ -376,7 +374,6 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) nfsd_reply_cache_free_locked(list_first_entry(&lru_head, struct svc_cacherep, c_lru)); -setup_entry: nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; -- cgit v1.2.3 From 9dc56143c298692276231735ec6546c1fac596e0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Mar 2013 10:15:37 -0400 Subject: nfsd: break out comparator into separate function Break out the function that compares the rqstp and checksum against a reply cache entry. While we're at it, track the efficacy of the checksum over the NFS data by tracking the cases where we would have incorrectly matched a DRC entry if we had not tracked it or the length. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index c61391e8e09d..48f5ef944234 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -23,9 +23,21 @@ static struct hlist_head * cache_hash; static struct list_head lru_head; static struct kmem_cache *drc_slab; -static unsigned int num_drc_entries; + +/* max number of entries allowed in the cache */ static unsigned int max_drc_entries; +/* + * Stats and other tracking of on the duplicate reply cache. All of these and + * the "rc" fields in nfsdstats are protected by the cache_lock + */ + +/* total number of entries */ +static unsigned int num_drc_entries; + +/* cache misses due only to checksum comparison failures */ +static unsigned int payload_misses; + /* * Calculate the hash index from an XID. */ @@ -273,6 +285,26 @@ nfsd_cache_csum(struct svc_rqst *rqstp) return csum; } +static bool +nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) +{ + /* Check RPC header info first */ + if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + + /* compare checksum of NFS data */ + if (csum != rp->c_csum) { + ++payload_misses; + return false; + } + + return true; +} + /* * Search the request hash for an entry that matches the given rqstp. * Must be called with cache_lock held. Returns the found entry or @@ -283,18 +315,10 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) { struct svc_cacherep *rp; struct hlist_head *rh; - __be32 xid = rqstp->rq_xid; - u32 proto = rqstp->rq_prot, - vers = rqstp->rq_vers, - proc = rqstp->rq_proc; - rh = &cache_hash[request_hash(xid)]; + rh = &cache_hash[request_hash(rqstp->rq_xid)]; hlist_for_each_entry(rp, rh, c_hash) { - if (xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && - rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && - rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) + if (nfsd_cache_match(rqstp, csum, rp)) return rp; } return NULL; -- cgit v1.2.3 From 6c6910cd4d0cdb905fbba8c751afd143696930f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Mar 2013 10:15:38 -0400 Subject: nfsd: track memory utilization by the DRC Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 48f5ef944234..1f45b3353bb1 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -38,6 +38,9 @@ static unsigned int num_drc_entries; /* cache misses due only to checksum comparison failures */ static unsigned int payload_misses; +/* amount of memory (in bytes) currently consumed by the DRC */ +static unsigned int drc_mem_usage; + /* * Calculate the hash index from an XID. */ @@ -112,12 +115,15 @@ nfsd_reply_cache_alloc(void) static void nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { - if (rp->c_type == RC_REPLBUFF) + if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { + drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); + } if (!hlist_unhashed(&rp->c_hash)) hlist_del(&rp->c_hash); list_del(&rp->c_lru); --num_drc_entries; + drc_mem_usage -= sizeof(*rp); kmem_cache_free(drc_slab, rp); } @@ -372,8 +378,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) spin_unlock(&cache_lock); rp = nfsd_reply_cache_alloc(); spin_lock(&cache_lock); - if (likely(rp)) + if (likely(rp)) { ++num_drc_entries; + drc_mem_usage += sizeof(*rp); + } search_cache: found = nfsd_cache_search(rqstp, csum); @@ -415,6 +423,7 @@ search_cache: /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { + drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); rp->c_replvec.iov_base = NULL; } @@ -483,6 +492,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; + size_t bufsize = 0; if (!rp) return; @@ -504,19 +514,21 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) break; case RC_REPLBUFF: cachv = &rp->c_replvec; - cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + bufsize = len << 2; + cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { nfsd_reply_cache_free(rp); return; } - cachv->iov_len = len << 2; - memcpy(cachv->iov_base, statp, len << 2); + cachv->iov_len = bufsize; + memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: nfsd_reply_cache_free(rp); return; } spin_lock(&cache_lock); + drc_mem_usage += bufsize; lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; -- cgit v1.2.3 From a2f999a37ebb77e857d3a178bd6f52d1163cd980 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Mar 2013 10:15:38 -0400 Subject: nfsd: add new reply_cache_stats file in nfsdfs For presenting statistics relating to duplicate reply cache. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/cache.h | 1 + fs/nfsd/nfscache.c | 25 +++++++++++++++++++++++++ fs/nfsd/nfsctl.c | 9 +++++++++ 3 files changed, 35 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 87fd1410b737..d5c5b3e00266 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -82,6 +82,7 @@ int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); +int nfsd_reply_cache_stats_open(struct inode *, struct file *); #ifdef CONFIG_NFSD_V4 void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 1f45b3353bb1..fd81ca79a002 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -556,3 +556,28 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) vec->iov_len += data->iov_len; return 1; } + +/* + * Note that fields may be added, removed or reordered in the future. Programs + * scraping this file for info should test the labels to ensure they're + * getting the correct field. + */ +static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +{ + spin_lock(&cache_lock); + seq_printf(m, "max entries: %u\n", max_drc_entries); + seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "hash buckets: %u\n", HASHSIZE); + seq_printf(m, "mem usage: %u\n", drc_mem_usage); + seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); + seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); + seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); + seq_printf(m, "payload misses: %u\n", payload_misses); + spin_unlock(&cache_lock); + return 0; +} + +int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_reply_cache_stats_show, NULL); +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f33455b4d957..a830f33df3ef 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -35,6 +35,7 @@ enum { NFSD_Threads, NFSD_Pool_Threads, NFSD_Pool_Stats, + NFSD_Reply_Cache_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, @@ -212,6 +213,13 @@ static const struct file_operations pool_stats_operations = { .owner = THIS_MODULE, }; +static struct file_operations reply_cache_stats_operations = { + .open = nfsd_reply_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -1047,6 +1055,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, -- cgit v1.2.3 From 98d821bda189ba2bfcb3877ea3064da3403698ae Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Mar 2013 10:15:39 -0400 Subject: nfsd: keep stats on worst hash balancing seen so far The typical case with the DRC is a cache miss, so if we keep track of the max number of entries that we've ever walked over in a search, then we should have a reasonable estimate of the longest hash chain that we've ever seen. With that, we'll also keep track of the total size of the cache when we see the longest chain. In the case of a tie, we prefer to track the smallest total cache size in order to properly gauge the worst-case ratio of max vs. avg chain length. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index fd81ca79a002..17cb0d6b9944 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -41,6 +41,12 @@ static unsigned int payload_misses; /* amount of memory (in bytes) currently consumed by the DRC */ static unsigned int drc_mem_usage; +/* longest hash chain seen */ +static unsigned int longest_chain; + +/* size of cache when we saw the longest hash chain */ +static unsigned int longest_chain_cachesize; + /* * Calculate the hash index from an XID. */ @@ -319,15 +325,30 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) static struct svc_cacherep * nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) { - struct svc_cacherep *rp; + struct svc_cacherep *rp, *ret = NULL; struct hlist_head *rh; + unsigned int entries = 0; rh = &cache_hash[request_hash(rqstp->rq_xid)]; hlist_for_each_entry(rp, rh, c_hash) { - if (nfsd_cache_match(rqstp, csum, rp)) - return rp; + ++entries; + if (nfsd_cache_match(rqstp, csum, rp)) { + ret = rp; + break; + } + } + + /* tally hash chain length stats */ + if (entries > longest_chain) { + longest_chain = entries; + longest_chain_cachesize = num_drc_entries; + } else if (entries == longest_chain) { + /* prefer to keep the smallest cachesize possible here */ + longest_chain_cachesize = min(longest_chain_cachesize, + num_drc_entries); } - return NULL; + + return ret; } /* @@ -573,6 +594,8 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); seq_printf(m, "payload misses: %u\n", payload_misses); + seq_printf(m, "longest chain len: %u\n", longest_chain); + seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); spin_unlock(&cache_lock); return 0; } -- cgit v1.2.3 From 0733c7ba1ef0d7e29a11c52f4d1356fc394de334 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Mar 2013 10:15:39 -0400 Subject: nfsd: scale up the number of DRC hash buckets with cache size We've now increased the size of the duplicate reply cache by quite a bit, but the number of hash buckets has not changed. So, we've gone from an average hash chain length of 16 in the old code to 4096 when the cache is its largest. Change the code to scale out the number of buckets with the max size of the cache. At the same time, we also need to fix the hash function since the existing one isn't really suitable when there are more than 256 buckets. Move instead to use the stock hash_32 function for this. Testing on a machine that had 2048 buckets showed that this gave a smaller longest:average ratio than the existing hash function: The formula here is longest hash bucket searched divided by average number of entries per bucket at the time that we saw that longest bucket: old hash: 68/(39258/2048) == 3.547404 hash_32: 45/(33773/2048) == 2.728807 Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 17cb0d6b9944..eb2587745a64 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include "nfsd.h" @@ -18,7 +20,12 @@ #define NFSDDBG_FACILITY NFSDDBG_REPCACHE -#define HASHSIZE 64 +/* + * We use this value to determine the number of hash buckets from the max + * cache size, the idea being that when the cache is at its maximum number + * of entries, then this should be the average number of entries per bucket. + */ +#define TARGET_BUCKET_SIZE 64 static struct hlist_head * cache_hash; static struct list_head lru_head; @@ -27,6 +34,9 @@ static struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ static unsigned int max_drc_entries; +/* number of significant bits in the hash value */ +static unsigned int maskbits; + /* * Stats and other tracking of on the duplicate reply cache. All of these and * the "rc" fields in nfsdstats are protected by the cache_lock @@ -47,16 +57,6 @@ static unsigned int longest_chain; /* size of cache when we saw the longest hash chain */ static unsigned int longest_chain_cachesize; -/* - * Calculate the hash index from an XID. - */ -static inline u32 request_hash(u32 xid) -{ - u32 h = xid; - h ^= (xid >> 24); - return h & (HASHSIZE-1); -} - static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static void cache_cleaner_func(struct work_struct *unused); static int nfsd_reply_cache_shrink(struct shrinker *shrink, @@ -103,6 +103,16 @@ nfsd_cache_size_limit(void) return min_t(unsigned int, limit, 256*1024); } +/* + * Compute the number of hash buckets we need. Divide the max cachesize by + * the "target" max bucket size, and round up to next power of two. + */ +static unsigned int +nfsd_hashsize(unsigned int limit) +{ + return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); +} + static struct svc_cacherep * nfsd_reply_cache_alloc(void) { @@ -143,9 +153,13 @@ nfsd_reply_cache_free(struct svc_cacherep *rp) int nfsd_reply_cache_init(void) { + unsigned int hashsize; + INIT_LIST_HEAD(&lru_head); max_drc_entries = nfsd_cache_size_limit(); num_drc_entries = 0; + hashsize = nfsd_hashsize(max_drc_entries); + maskbits = ilog2(hashsize); register_shrinker(&nfsd_reply_cache_shrinker); drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), @@ -153,7 +167,7 @@ int nfsd_reply_cache_init(void) if (!drc_slab) goto out_nomem; - cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; @@ -204,7 +218,7 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); } static inline bool @@ -329,7 +343,7 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) struct hlist_head *rh; unsigned int entries = 0; - rh = &cache_hash[request_hash(rqstp->rq_xid)]; + rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; hlist_for_each_entry(rp, rh, c_hash) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { @@ -588,7 +602,7 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) spin_lock(&cache_lock); seq_printf(m, "max entries: %u\n", max_drc_entries); seq_printf(m, "num entries: %u\n", num_drc_entries); - seq_printf(m, "hash buckets: %u\n", HASHSIZE); + seq_printf(m, "hash buckets: %u\n", 1 << maskbits); seq_printf(m, "mem usage: %u\n", drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); -- cgit v1.2.3 From b600de7ab9288eaf6126561203e0ae340828ab44 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 28 Feb 2013 11:55:46 -0800 Subject: nfsd4: remove BUG_ON This BUG_ON just crashes the thread a little earlier than it would otherwise--it doesn't seem useful. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ae73175e6e68..c7e4e8c28827 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1282,12 +1282,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (op->status) goto encode_op; - if (opdesc->op_func) { - if (opdesc->op_get_currentstateid) - opdesc->op_get_currentstateid(cstate, &op->u); - op->status = opdesc->op_func(rqstp, cstate, &op->u); - } else - BUG_ON(op->status == nfs_ok); + if (opdesc->op_get_currentstateid) + opdesc->op_get_currentstateid(cstate, &op->u); + op->status = opdesc->op_func(rqstp, cstate, &op->u); if (!op->status) { if (opdesc->op_set_currentstateid) -- cgit v1.2.3 From 9d313b17db965ae42137c5d4dd3063037544c4cd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 28 Feb 2013 12:51:49 -0800 Subject: nfsd4: handle seqid-mutating open errors from xdr decoding If a client sets an owner (or group_owner or acl) attribute on open for create, and the mapping of that owner to an id fails, then we return BAD_OWNER. But BAD_OWNER is a seqid-mutating error, so we can't shortcut the open processing that case: we have to at least look up the owner so we can find the seqid to bump. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 27 ++++++++++++++++++++++++++- fs/nfsd/nfs4xdr.c | 1 + fs/nfsd/xdr4.h | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c7e4e8c28827..42c498ce9f0e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -351,6 +351,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (status) goto out; + if (open->op_xdr_error) { + status = open->op_xdr_error; + goto out; + } status = nfsd4_check_open_attributes(rqstp, cstate, open); if (status) @@ -416,6 +420,24 @@ out: return status; } +/* + * OPEN is the only seqid-mutating operation whose decoding can fail + * with a seqid-mutating error (specifically, decoding of user names in + * the attributes). Therefore we have to do some processing to look up + * the stateowner so that we can bump the seqid. + */ +static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op) +{ + struct nfsd4_open *open = (struct nfsd4_open *)&op->u; + + if (!seqid_mutating_err(ntohl(op->status))) + return op->status; + if (nfsd4_has_session(cstate)) + return op->status; + open->op_xdr_error = op->status; + return nfsd4_open(rqstp, cstate, open); +} + /* * filehandle-manipulating ops. */ @@ -1244,8 +1266,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * for example, if there is a miscellaneous XDR error * it will be set to nfserr_bad_xdr. */ - if (op->status) + if (op->status) { + if (op->opnum == OP_OPEN) + op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; + } /* We must be able to encode a successful response to * this operation, with enough room left over to encode a diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a2720071f282..229b3ac246e1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -804,6 +804,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) open->op_iattr.ia_valid = 0; open->op_openowner = NULL; + open->op_xdr_error = 0; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(4); READ32(open->op_seqid); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 546f8983ecf1..be0a79d1dbcb 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -237,6 +237,7 @@ struct nfsd4_open { u32 op_share_deny; /* request */ u32 op_deleg_want; /* request */ stateid_t op_stateid; /* response */ + __be32 op_xdr_error; /* see nfsd4_open_omfg() */ u32 op_recall; /* recall */ struct nfsd4_change_info op_cinfo; /* response */ u32 op_rflags; /* response */ -- cgit v1.2.3 From b0a9d3ab577464529f6649ec54f8a0de160866e3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 7 Mar 2013 17:26:18 -0500 Subject: nfsd4: fix race on client shutdown Dropping the session's reference count after the client's means we leave a window where the session's se_client pointer is NULL. An xpt_user callback that encounters such a session may then crash: [ 303.956011] BUG: unable to handle kernel NULL pointer dereference at 0000000000000318 [ 303.959061] IP: [] _raw_spin_lock+0x1e/0x40 [ 303.959061] PGD 37811067 PUD 3d498067 PMD 0 [ 303.959061] Oops: 0002 [#8] PREEMPT SMP [ 303.959061] Modules linked in: md5 nfsd auth_rpcgss nfs_acl snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_page_alloc microcode psmouse snd_timer serio_raw pcspkr evdev snd soundcore i2c_piix4 i2c_core intel_agp intel_gtt processor button nfs lockd sunrpc fscache ata_generic pata_acpi ata_piix uhci_hcd libata btrfs usbcore usb_common crc32c scsi_mod libcrc32c zlib_deflate floppy virtio_balloon virtio_net virtio_pci virtio_blk virtio_ring virtio [ 303.959061] CPU 0 [ 303.959061] Pid: 264, comm: nfsd Tainted: G D 3.8.0-ARCH+ #156 Bochs Bochs [ 303.959061] RIP: 0010:[] [] _raw_spin_lock+0x1e/0x40 [ 303.959061] RSP: 0018:ffff880037877dd8 EFLAGS: 00010202 [ 303.959061] RAX: 0000000000000100 RBX: ffff880037a2b698 RCX: ffff88003d879278 [ 303.959061] RDX: ffff88003d879278 RSI: dead000000100100 RDI: 0000000000000318 [ 303.959061] RBP: ffff880037877dd8 R08: ffff88003c5a0f00 R09: 0000000000000002 [ 303.959061] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 [ 303.959061] R13: 0000000000000318 R14: ffff880037a2b680 R15: ffff88003c1cbe00 [ 303.959061] FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 303.959061] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 303.959061] CR2: 0000000000000318 CR3: 000000003d49c000 CR4: 00000000000006f0 [ 303.959061] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 303.959061] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 303.959061] Process nfsd (pid: 264, threadinfo ffff880037876000, task ffff88003c1fd0a0) [ 303.959061] Stack: [ 303.959061] ffff880037877e08 ffffffffa03772ec ffff88003d879000 ffff88003d879278 [ 303.959061] ffff88003d879080 0000000000000000 ffff880037877e38 ffffffffa0222a1f [ 303.959061] 0000000000107ac0 ffff88003c22e000 ffff88003d879000 ffff88003c1cbe00 [ 303.959061] Call Trace: [ 303.959061] [] nfsd4_conn_lost+0x3c/0xa0 [nfsd] [ 303.959061] [] svc_delete_xprt+0x10f/0x180 [sunrpc] [ 303.959061] [] svc_recv+0xe6/0x580 [sunrpc] [ 303.959061] [] nfsd+0xb5/0x140 [nfsd] [ 303.959061] [] ? nfsd_destroy+0x90/0x90 [nfsd] [ 303.959061] [] kthread+0xc0/0xd0 [ 303.959061] [] ? perf_trace_xen_mmu_set_pte_at+0x50/0x100 [ 303.959061] [] ? kthread_freezable_should_stop+0x70/0x70 [ 303.959061] [] ret_from_fork+0x7c/0xb0 [ 303.959061] [] ? kthread_freezable_should_stop+0x70/0x70 [ 303.959061] Code: ff ff 5d c3 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 55 65 48 8b 04 25 f0 c6 00 00 48 89 e5 83 80 44 e0 ff ff 01 b8 00 01 00 00 <3e> 66 0f c1 07 0f b6 d4 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f [ 303.959061] RIP [] _raw_spin_lock+0x1e/0x40 [ 303.959061] RSP [ 303.959061] CR2: 0000000000000318 [ 304.001218] ---[ end trace 2d809cd4a7931f5a ]--- [ 304.001903] note: nfsd[264] exited with preempt_count 2 Reported-by: Bryan Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 12 ++++++++---- fs/nfsd/nfs4xdr.c | 1 - fs/nfsd/state.h | 2 -- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2e27430b9070..3e5cbfe8a967 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -864,7 +864,7 @@ static void free_session(struct kref *kref) __free_session(ses); } -void nfsd4_put_session(struct nfsd4_session *ses) +static void nfsd4_put_session(struct nfsd4_session *ses) { struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); @@ -1057,12 +1057,16 @@ release_session_client(struct nfsd4_session *session) struct nfs4_client *clp = session->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + nfsd4_put_session(session); if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) return; - if (is_client_expired(clp)) { + /* + * At this point we also know all sessions have refcnt 1, + * so free_client will delete them all if necessary: + */ + if (is_client_expired(clp)) free_client(clp); - session->se_client = NULL; - } else + else renew_client_locked(clp); spin_unlock(&nn->client_lock); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 229b3ac246e1..9b02b6652f2b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3685,7 +3685,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo } /* Renew the clientid on success and on replay */ release_session_client(cs->session); - nfsd4_put_session(cs->session); } return 1; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 1a8c7391f7ae..327552bb6dba 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -209,8 +209,6 @@ struct nfsd4_session { struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; -extern void nfsd4_put_session(struct nfsd4_session *ses); - /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { clientid_t clientid; -- cgit v1.2.3 From 2e4b7239a62a0c58664bf0cf73aea951b7e046fc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Mar 2013 09:30:43 -0500 Subject: nfsd4: fix use-after-free of 4.1 client on connection loss Once we drop the lock here there's nothing keeping the client around: the only lock still held is the xpt_lock on this socket, but this socket no longer has any connection with the client so there's no way for other code to know we're still using the client. The solution is simple: all nfsd4_probe_callback does is set a few variables and queue some work, so there's no reason we can't just keep it under the lock. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3e5cbfe8a967..baf314a950b8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -761,8 +761,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) list_del(&c->cn_persession); free_conn(c); } - spin_unlock(&clp->cl_lock); nfsd4_probe_callback(clp); + spin_unlock(&clp->cl_lock); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) -- cgit v1.2.3 From 9c6bdbb8dd58c8de8f36e1deb6b768918c85c249 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Mon, 11 Mar 2013 10:43:26 +0800 Subject: nfsd: remove unused macro in nfsv4 lk_rflags is never used anywhere, and rflags is not defined in struct nfsd4_lock. Signed-off-by: Yanchuan Nian Signed-off-by: J. Bruce Fields --- fs/nfsd/xdr4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index be0a79d1dbcb..40e05e6d2518 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -184,7 +184,6 @@ struct nfsd4_lock { #define lk_old_lock_stateid v.old.lock_stateid #define lk_old_lock_seqid v.old.lock_seqid -#define lk_rflags u.ok.rflags #define lk_resp_stateid u.ok.stateid #define lk_denied u.denied -- cgit v1.2.3 From 491402a7876e91aa491c33f70ed4e86e59f06c8b Mon Sep 17 00:00:00 2001 From: "ycnian@gmail.com" Date: Mon, 11 Mar 2013 08:46:14 +0800 Subject: nfsd: fix bug on nfs4 stateid deallocation NFS4_OO_PURGE_CLOSE is not handled properly. To avoid memory leak, nfs4 stateid which is pointed by oo_last_closed_stid is freed in nfsd4_close(), but NFS4_OO_PURGE_CLOSE isn't cleared meanwhile. So the stateid released in THIS close procedure may be freed immediately in the coming encoding function. Sorry that Signed-off-by was forgotten in last version. Signed-off-by: Yanchuan Nian Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index baf314a950b8..aac878ecabc4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3823,6 +3823,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); release_last_closed_stateid(oo); + oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; oo->oo_last_closed_stid = stp; if (list_empty(&oo->oo_owner.so_stateids)) { -- cgit v1.2.3 From 78389046f733564d5c2c94f0b8d6ff0cdae951d9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 12 Mar 2013 10:12:37 -0400 Subject: nfsd4: warn on odd create_session state This should never happen. (Note: the comparable case in setclientid_confirm *can* happen, since updating a client record can result in both confirmed and unconfirmed records with the same clientid.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index aac878ecabc4..ef7c6222b7c8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1788,6 +1788,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, nfs4_lock_state(); unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); if (conf) { cs_slot = &conf->cl_cs_slot; @@ -2129,6 +2130,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta nfs4_lock_state(); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); if (conf) { clp = conf; -- cgit v1.2.3 From 0eb6f20aa532b0c16849d627926c2ad3fe2f1cdf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 12 Mar 2013 17:36:17 -0400 Subject: nfsd4: STALE_STATEID cleanup Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ef7c6222b7c8..a0ab6ad7239d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3282,16 +3282,6 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *s return nfs_ok; } -static int -STALE_STATEID(stateid_t *stateid, struct nfsd_net *nn) -{ - if (stateid->si_opaque.so_clid.cl_boot == nn->boot_time) - return 0; - dprintk("NFSD: stale stateid " STATEID_FMT "!\n", - STATEID_VAL(stateid)); - return 1; -} - static inline int access_permit_read(struct nfs4_ol_stateid *stp) { @@ -3422,19 +3412,20 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfsd_net *nn) { struct nfs4_client *cl; + __be32 status; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - if (STALE_STATEID(stateid, nn)) + status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, + nn, &cl); + if (status == nfserr_stale_clientid) return nfserr_stale_stateid; - cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn); - if (!cl) - return nfserr_expired; + if (status) + return status; *s = find_stateid_by_type(cl, stateid, typemask); if (!*s) return nfserr_bad_stateid; return nfs_ok; - } /* -- cgit v1.2.3 From 1ca507920db36aea8b81fe1443f96a1a6a43318f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 14 Mar 2013 18:12:03 -0400 Subject: nfsd4: remove some dprintk's E.g. printk's that just report the return value from an op are uninteresting as we already do that in the main proc_compound loop. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a0ab6ad7239d..84dfbdfd2d2c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1843,15 +1843,13 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); nfs4_unlock_state(); -out: - dprintk("%s returns %d\n", __func__, ntohl(status)); return status; out_free_conn: nfs4_unlock_state(); free_conn(conn); out_free_session: __free_session(new); - goto out; + return status; } static __be32 nfsd4_map_bcts_dir(u32 *dir) @@ -1963,7 +1961,6 @@ nfsd4_destroy_session(struct svc_rqst *r, spin_unlock(&nn->client_lock); status = nfs_ok; out: - dprintk("%s returns %d\n", __func__, ntohl(status)); return status; } @@ -2116,7 +2113,6 @@ out: } kfree(conn); spin_unlock(&nn->client_lock); - dprintk("%s: return %d\n", __func__, ntohl(status)); return status; } @@ -2155,7 +2151,6 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta expire_client(clp); out: nfs4_unlock_state(); - dprintk("%s return %d\n", __func__, ntohl(status)); return status; } @@ -2532,8 +2527,6 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_ol_stateid *stp; __be32 ret; - dprintk("NFSD: nfs4_share_conflict\n"); - fp = find_file(ino); if (!fp) return nfs_ok; -- cgit v1.2.3 From c0293b0131a8d582af85023c684786f7536f0767 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 14 Mar 2013 18:20:01 -0400 Subject: nfsd4: destroy_clientid simplification I'm not sure what the check for clientid expiry was meant to do here. The check for a matching session is redundant given the previous check for state: a client without state is, in particular, a client without sessions. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 84dfbdfd2d2c..905a5b511047 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2131,13 +2131,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta if (conf) { clp = conf; - if (!is_client_expired(conf) && client_has_state(conf)) { - status = nfserr_clientid_busy; - goto out; - } - - /* rfc5661 18.50.3 */ - if (cstate->session && conf == cstate->session->se_client) { + if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } -- cgit v1.2.3 From bfa85e83a87aec71cbb231256eaad7341aa8b4fa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 14 Mar 2013 18:24:52 -0400 Subject: nfsd4: clientid lookup cleanup Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 905a5b511047..c89bb3c40a0b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1380,12 +1380,12 @@ move_to_confirmed(struct nfs4_client *clp) } static struct nfs4_client * -find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) +find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &tbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; @@ -1396,20 +1396,20 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) return NULL; } +static struct nfs4_client * +find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) +{ + struct list_head *tbl = nn->conf_id_hashtbl; + + return find_client_in_id_table(tbl, clid, sessions); +} + static struct nfs4_client * find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - struct nfs4_client *clp; - unsigned int idhashval = clientid_hashval(clid->cl_id); + struct list_head *tbl = nn->unconf_id_hashtbl; - list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) { - if (same_clid(&clp->cl_clientid, clid)) { - if ((bool)clp->cl_minorversion != sessions) - return NULL; - return clp; - } - } - return NULL; + return find_client_in_id_table(tbl, clid, sessions); } static bool clp_used_exchangeid(struct nfs4_client *clp) -- cgit v1.2.3 From abcdff09a05117112aa22cd84939039655bca710 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 14 Mar 2013 19:55:33 -0400 Subject: nfsd4: fix destroy_session race destroy_session uses the session and client without continuously holding any reference or locks. Put the whole thing under the state lock for now. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c89bb3c40a0b..8cc668dc4997 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1926,41 +1926,35 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_destroy_session *sessionid) { struct nfsd4_session *ses; - __be32 status = nfserr_badsession; + __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); - /* Notes: - * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid - * - Should we return nfserr_back_chan_busy if waiting for - * callbacks on to-be-destroyed session? - * - Do we need to clear any callback info from previous session? - */ - + nfs4_lock_state(); + status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) - return nfserr_not_only_op; + goto out; } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); - if (!ses) { - spin_unlock(&nn->client_lock); - goto out; - } + status = nfserr_badsession; + if (!ses) + goto out_client_lock; unhash_session(ses); spin_unlock(&nn->client_lock); - nfs4_lock_state(); nfsd4_probe_callback_sync(ses->se_client); - nfs4_unlock_state(); spin_lock(&nn->client_lock); nfsd4_del_conns(ses); nfsd4_put_session_locked(ses); - spin_unlock(&nn->client_lock); status = nfs_ok; +out_client_lock: + spin_unlock(&nn->client_lock); out: + nfs4_unlock_state(); return status; } -- cgit v1.2.3 From 4f6e6c17733ecf01c05a693ced8349ccf8101fd8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 18 Mar 2013 17:31:30 -0400 Subject: nfsd4: simplify bind_conn_to_session locking The locking here is very fiddly, and there's no reason for us to be setting cstate->session, since this is the only op in the compound. Let's just take the state lock and drop the reference counting. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8cc668dc4997..8e83cef4d0bd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1887,30 +1887,30 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, { __be32 status; struct nfsd4_conn *conn; + struct nfsd4_session *session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; + nfs4_lock_state(); spin_lock(&nn->client_lock); - cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); - /* Sorta weird: we only need the refcnt'ing because new_conn acquires - * client_lock iself: */ - if (cstate->session) { - nfsd4_get_session(cstate->session); - atomic_inc(&cstate->session->se_client->cl_refcount); - } + session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); spin_unlock(&nn->client_lock); - if (!cstate->session) - return nfserr_badsession; - + status = nfserr_badsession; + if (!session) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) - return status; + goto out; conn = alloc_conn(rqstp, bcts->dir); + status = nfserr_jukebox; if (!conn) - return nfserr_jukebox; - nfsd4_init_conn(rqstp, conn, cstate->session); - return nfs_ok; + goto out; + nfsd4_init_conn(rqstp, conn, session); + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; } static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) -- cgit v1.2.3 From 221a68766973d7a3afe40a05abd8258b5de016a0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 1 Apr 2013 22:23:49 -0400 Subject: nfsd4: don't destroy in-use clients When a setclientid_confirm or create_session confirms a client after a client reboot, it also destroys any previous state held by that client. The shutdown of that previous state must be careful not to free the client out from under threads processing other requests that refer to the client. This is a particular problem in the NFSv4.1 case when we hold a reference to a session (hence a client) throughout compound processing. The server attempts to handle this by unhashing the client at the time it's destroyed, then delaying the final free to the end. But this still leaves some races in the current code. I believe it's simpler just to fail the attempt to destroy the client by returning NFS4ERR_DELAY. This is a case that should never happen anyway. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 209 +++++++++++++++++++++++++++++++--------------------- fs/nfsd/nfs4xdr.c | 3 +- fs/nfsd/state.h | 16 +--- 3 files changed, 131 insertions(+), 97 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8e83cef4d0bd..3b4ce41c9db8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -113,6 +113,90 @@ nfs4_unlock_state(void) mutex_unlock(&client_mutex); } +static bool is_client_expired(struct nfs4_client *clp) +{ + return clp->cl_time == 0; +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + clp->cl_time = 0; + return nfs_ok; +} + +static __be32 mark_client_expired(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + __be32 ret; + + spin_lock(&nn->client_lock); + ret = mark_client_expired_locked(clp); + spin_unlock(&nn->client_lock); + return ret; +} + +static __be32 get_client_locked(struct nfs4_client *clp) +{ + if (is_client_expired(clp)) + return nfserr_expired; + atomic_inc(&clp->cl_refcount); + return nfs_ok; +} + +/* must be called under the client_lock */ +static inline void +renew_client_locked(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (is_client_expired(clp)) { + WARN_ON(1); + printk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &nn->client_lru); + clp->cl_time = get_seconds(); +} + +static inline void +renew_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +void put_client_renew_locked(struct nfs4_client *clp) +{ + if (!atomic_dec_and_test(&clp->cl_refcount)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); +} + +void put_client_renew(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + + static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -864,7 +948,7 @@ static void free_session(struct kref *kref) __free_session(ses); } -static void nfsd4_put_session(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) { struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); @@ -968,38 +1052,6 @@ unhash_session(struct nfsd4_session *ses) spin_unlock(&ses->se_client->cl_lock); } -/* must be called under the client_lock */ -static inline void -renew_client_locked(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - if (is_client_expired(clp)) { - WARN_ON(1); - printk("%s: client (clientid %08x/%08x) already expired\n", - __func__, - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - return; - } - - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - list_move_tail(&clp->cl_lru, &nn->client_lru); - clp->cl_time = get_seconds(); -} - -static inline void -renew_client(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - renew_client_locked(clp); - spin_unlock(&nn->client_lock); -} - /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) @@ -1051,33 +1103,12 @@ free_client(struct nfs4_client *clp) kfree(clp); } -void -release_session_client(struct nfsd4_session *session) -{ - struct nfs4_client *clp = session->se_client; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - nfsd4_put_session(session); - if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) - return; - /* - * At this point we also know all sessions have refcnt 1, - * so free_client will delete them all if necessary: - */ - if (is_client_expired(clp)) - free_client(clp); - else - renew_client_locked(clp); - spin_unlock(&nn->client_lock); -} - /* must be called under the client_lock */ static inline void unhash_client_locked(struct nfs4_client *clp) { struct nfsd4_session *ses; - mark_client_expired(clp); list_del(&clp->cl_lru); spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) @@ -1119,8 +1150,8 @@ destroy_client(struct nfs4_client *clp) rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); spin_lock(&nn->client_lock); unhash_client_locked(clp); - if (atomic_read(&clp->cl_refcount) == 0) - free_client(clp); + WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); + free_client(clp); spin_unlock(&nn->client_lock); } @@ -1815,8 +1846,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } old = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (old) + if (old) { + status = mark_client_expired(old); + if (status) + goto out_free_conn; expire_client(old); + } move_to_confirmed(unconf); conf = unconf; } else { @@ -2014,6 +2049,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_session *session; + struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; @@ -2034,19 +2070,23 @@ nfsd4_sequence(struct svc_rqst *rqstp, status = nfserr_badsession; session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); if (!session) - goto out; + goto out_no_session; + clp = session->se_client; + status = get_client_locked(clp); + if (status) + goto out_no_session; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) - goto out; + goto out_put_client; status = nfserr_req_too_big; if (nfsd4_request_too_big(rqstp, session)) - goto out; + goto out_put_client; status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) - goto out; + goto out_put_client; slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); @@ -2061,7 +2101,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) - goto out; + goto out_put_client; cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status @@ -2071,7 +2111,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out; } if (status) - goto out; + goto out_put_client; nfsd4_sequence_check_conn(conn, session); conn = NULL; @@ -2088,26 +2128,24 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->session = session; out: - /* Hold a session reference until done processing the compound. */ - if (cstate->session) { - struct nfs4_client *clp = session->se_client; - - nfsd4_get_session(cstate->session); - atomic_inc(&clp->cl_refcount); - switch (clp->cl_cb_state) { - case NFSD4_CB_DOWN: - seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; - break; - case NFSD4_CB_FAULT: - seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; - break; - default: - seq->status_flags = 0; - } + nfsd4_get_session(cstate->session); + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; } +out_no_session: kfree(conn); spin_unlock(&nn->client_lock); return status; +out_put_client: + put_client_renew_locked(clp); + goto out_no_session; } __be32 @@ -2276,8 +2314,12 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ conf = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (conf) + if (conf) { + status = mark_client_expired(conf); + if (status) + goto out; expire_client(conf); + } move_to_confirmed(unconf); nfsd4_probe_callback(unconf); } @@ -3189,13 +3231,12 @@ nfs4_laundromat(struct nfsd_net *nn) clientid_val = t; break; } - if (atomic_read(&clp->cl_refcount)) { + if (mark_client_expired_locked(clp)) { dprintk("NFSD: client in use (clientid %08x)\n", clp->cl_clientid.cl_id); continue; } - unhash_client_locked(clp); - list_add(&clp->cl_lru, &reaplist); + list_move(&clp->cl_lru, &reaplist); } spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { @@ -4581,6 +4622,8 @@ nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { + if (mark_client_expired(clp)) + return 0; expire_client(clp); return 1; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9b02b6652f2b..700de0192834 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3684,7 +3684,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ - release_session_client(cs->session); + put_client_renew(cs->session->se_client); + nfsd4_put_session(cs->session); } return 1; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 327552bb6dba..07f8a822a6ce 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -209,6 +209,8 @@ struct nfsd4_session { struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; +extern void nfsd4_put_session(struct nfsd4_session *ses); + /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { clientid_t clientid; @@ -284,18 +286,6 @@ struct nfs4_client { struct net *net; }; -static inline void -mark_client_expired(struct nfs4_client *clp) -{ - clp->cl_time = 0; -} - -static inline bool -is_client_expired(struct nfs4_client *clp) -{ - return clp->cl_time == 0; -} - /* struct nfs4_client_reset * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl * upon lease reset, or from upcall to state_daemon (to read in state @@ -484,7 +474,7 @@ extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); -extern void release_session_client(struct nfsd4_session *); +extern void put_client_renew(struct nfs4_client *clp); extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); /* nfs4recover operations */ -- cgit v1.2.3 From 66b2b9b2b0e8a9034806293a436628400a44a71d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Mar 2013 12:05:39 -0400 Subject: nfsd4: don't destroy in-use session This changes session destruction to be similar to client destruction in that attempts to destroy a session while in use (which should be rare corner cases) result in DELAY. This simplifies things somewhat and helps meet a coming 4.2 requirement. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 72 +++++++++++++++++++++++++++++------------------------ fs/nfsd/state.h | 4 ++- 2 files changed, 43 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3b4ce41c9db8..2fd015587167 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -94,17 +94,32 @@ nfs4_lock_state(void) mutex_lock(&client_mutex); } -static void free_session(struct kref *); +static void free_session(struct nfsd4_session *); -/* Must be called under the client_lock */ -static void nfsd4_put_session_locked(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) +{ + atomic_dec(&ses->se_ref); +} + +static bool is_session_dead(struct nfsd4_session *ses) { - kref_put(&ses->se_ref, free_session); + return ses->se_flags & NFS4_SESSION_DEAD; +} + +static __be32 mark_session_dead_locked(struct nfsd4_session *ses) +{ + if (atomic_read(&ses->se_ref)) + return nfserr_jukebox; + ses->se_flags |= NFS4_SESSION_DEAD; + return nfs_ok; } -static void nfsd4_get_session(struct nfsd4_session *ses) +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) { - kref_get(&ses->se_ref); + if (is_session_dead(ses)) + return nfserr_badsession; + atomic_inc(&ses->se_ref); + return nfs_ok; } void @@ -935,28 +950,15 @@ static void __free_session(struct nfsd4_session *ses) kfree(ses); } -static void free_session(struct kref *kref) +static void free_session(struct nfsd4_session *ses) { - struct nfsd4_session *ses; - struct nfsd_net *nn; - - ses = container_of(kref, struct nfsd4_session, se_ref); - nn = net_generic(ses->se_client->net, nfsd_net_id); + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); __free_session(ses); } -void nfsd4_put_session(struct nfsd4_session *ses) -{ - struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - nfsd4_put_session_locked(ses); - spin_unlock(&nn->client_lock); -} - static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, struct nfsd_net *nn) { @@ -997,7 +999,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru new->se_flags = cses->flags; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; - kref_init(&new->se_ref); + atomic_set(&new->se_ref, 0); idx = hash_sessionid(&new->se_sessionid); spin_lock(&nn->client_lock); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); @@ -1095,7 +1097,8 @@ free_client(struct nfs4_client *clp) ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); list_del(&ses->se_perclnt); - nfsd4_put_session_locked(ses); + WARN_ON_ONCE(atomic_read(&ses->se_ref)); + free_session(ses); } free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); @@ -1976,15 +1979,16 @@ nfsd4_destroy_session(struct svc_rqst *r, status = nfserr_badsession; if (!ses) goto out_client_lock; - + status = mark_session_dead_locked(ses); + if (status) + goto out_client_lock; unhash_session(ses); spin_unlock(&nn->client_lock); nfsd4_probe_callback_sync(ses->se_client); spin_lock(&nn->client_lock); - nfsd4_del_conns(ses); - nfsd4_put_session_locked(ses); + free_session(ses); status = nfs_ok; out_client_lock: spin_unlock(&nn->client_lock); @@ -2075,18 +2079,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, status = get_client_locked(clp); if (status) goto out_no_session; + status = nfsd4_get_session_locked(session); + if (status) + goto out_put_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) - goto out_put_client; + goto out_put_session; status = nfserr_req_too_big; if (nfsd4_request_too_big(rqstp, session)) - goto out_put_client; + goto out_put_session; status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) - goto out_put_client; + goto out_put_session; slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); @@ -2101,7 +2108,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) - goto out_put_client; + goto out_put_session; cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status @@ -2111,7 +2118,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out; } if (status) - goto out_put_client; + goto out_put_session; nfsd4_sequence_check_conn(conn, session); conn = NULL; @@ -2128,7 +2135,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->session = session; out: - nfsd4_get_session(cstate->session); switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; @@ -2143,6 +2149,8 @@ out_no_session: kfree(conn); spin_unlock(&nn->client_lock); return status; +out_put_session: + nfsd4_put_session(session); out_put_client: put_client_renew_locked(clp); goto out_no_session; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 07f8a822a6ce..f6ae4db3efdb 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -194,9 +194,11 @@ struct nfsd4_conn { }; struct nfsd4_session { - struct kref se_ref; + atomic_t se_ref; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; +/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */ +#define NFS4_SESSION_DEAD 0x010 u32 se_flags; struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; -- cgit v1.2.3 From 89876f8c0dbcc2947b13b9e22cf28c5308cee3c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 2 Apr 2013 09:01:59 -0400 Subject: nfsd: convert the file_hashtbl to a hlist We only ever traverse the hash chains in the forward direction, so a double pointer list head isn't really necessary. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 14 ++++---------- fs/nfsd/state.h | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2fd015587167..7293e298aeed 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -236,7 +236,7 @@ static inline void put_nfs4_file(struct nfs4_file *fi) { if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { - list_del(&fi->fi_hash); + hlist_del(&fi->fi_hash); spin_unlock(&recall_lock); iput(fi->fi_inode); nfsd4_free_file(fi); @@ -280,7 +280,7 @@ static unsigned int file_hashval(struct inode *ino) return hash_ptr(ino, FILE_HASH_BITS); } -static struct list_head file_hashtbl[FILE_HASH_SIZE]; +static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { @@ -2347,7 +2347,6 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) unsigned int hashval = file_hashval(ino); atomic_set(&fp->fi_ref, 1); - INIT_LIST_HEAD(&fp->fi_hash); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); fp->fi_inode = igrab(ino); @@ -2356,7 +2355,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); spin_lock(&recall_lock); - list_add(&fp->fi_hash, &file_hashtbl[hashval]); + hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); spin_unlock(&recall_lock); } @@ -2542,7 +2541,7 @@ find_file(struct inode *ino) struct nfs4_file *fp; spin_lock(&recall_lock); - list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); spin_unlock(&recall_lock); @@ -4810,11 +4809,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_ void nfs4_state_init(void) { - int i; - - for (i = 0; i < FILE_HASH_SIZE; i++) { - INIT_LIST_HEAD(&file_hashtbl[i]); - } INIT_LIST_HEAD(&del_recall_lru); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f6ae4db3efdb..7674bc806200 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -380,7 +380,7 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) /* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ struct nfs4_file { atomic_t fi_ref; - struct list_head fi_hash; /* hash by "struct inode *" */ + struct hlist_node fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ -- cgit v1.2.3 From b022032e195ffca83d7002d6b84297d796ed443b Mon Sep 17 00:00:00 2001 From: fanchaoting Date: Mon, 1 Apr 2013 21:07:22 +0800 Subject: nfsd: don't run get_file if nfs4_preprocess_stateid_op return error we should return error status directly when nfs4_preprocess_stateid_op return error. Signed-off-by: fanchaoting Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 42c498ce9f0e..a9b707b23858 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -953,14 +953,14 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, stateid, WR_STATE, &filp); - if (filp) - get_file(filp); - nfs4_unlock_state(); - if (status) { + nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } + if (filp) + get_file(filp); + nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; -- cgit v1.2.3 From ff7c4b3693cbc7e938f49ed89e2f649a33f03ed1 Mon Sep 17 00:00:00 2001 From: fanchaoting Date: Wed, 27 Mar 2013 16:31:18 +0800 Subject: nfsd: remove /proc/fs/nfs when create /proc/fs/nfs/exports error when create /proc/fs/nfs/exports error, we should remove /proc/fs/nfs, if don't do it, it maybe cause Memory leak. Signed-off-by: fanchaoting Reviewed-by: chendt.fnst Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index a830f33df3ef..68a4d320cd14 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1111,8 +1111,10 @@ static int create_proc_exports_entry(void) return -ENOMEM; entry = proc_create("exports", 0, entry, &exports_proc_operations); - if (!entry) + if (!entry) { + remove_proc_entry("fs/nfs", NULL); return -ENOMEM; + } return 0; } #else /* CONFIG_PROC_FS */ -- cgit v1.2.3 From 8be2d2344cc192c20d7b2aa3211a5b74082e47d4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 21 Mar 2013 10:59:29 -0400 Subject: nfsd4: minor cb_recall simplification Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 99bc85ff0217..be3ff0f3ff68 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -817,8 +817,7 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; @@ -839,8 +838,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; dprintk("%s: minorversion=%d\n", __func__, clp->cl_minorversion); @@ -863,7 +861,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; nfsd4_cb_done(task, calldata); -- cgit v1.2.3 From 68a3396178e6688ad7367202cdf0af8ed03c8727 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 21 Mar 2013 11:21:50 -0400 Subject: nfsd4: shut down more of delegation earlier Once we've unhashed the delegation, it's only hanging around for the benefit of an oustanding recall, which only needs the encoded filehandle, stateid, and dl_retries counter. No point keeping the file around any longer, or keeping it hashed. This also fixes a race: calls to idr_remove should really be serialized by the caller, but the nfs4_put_delegation call from the callback code isn't taking the state lock. (Better might be to cancel the callback before destroying the delegation, and remove any need for reference counting--but I don't see an easy way to cancel an rpc call.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7293e298aeed..26a03fa6840a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -417,21 +417,18 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } -static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +static void remove_stid(struct nfs4_stid *s) { struct idr *stateids = &s->sc_client->cl_stateids; idr_remove(stateids, s->sc_stateid.si_opaque.so_id); - kmem_cache_free(slab, s); } void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - dprintk("NFSD: freeing dp %p\n",dp); - put_nfs4_file(dp->dl_file); - free_stid(&dp->dl_stid, deleg_slab); + kmem_cache_free(deleg_slab, dp); num_delegations--; } } @@ -462,6 +459,9 @@ unhash_delegation(struct nfs4_delegation *dp) list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; + remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } @@ -605,7 +605,8 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - free_stid(&stp->st_stid, stateid_slab); + remove_stid(&stp->st_stid); + kmem_cache_free(stateid_slab, stp); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) -- cgit v1.2.3 From e8c69d17d1ef8437aee729322db005573a467fd6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 21 Mar 2013 15:19:33 -0400 Subject: nfsd4: make del_recall_lru per-network-namespace If nothing else this simplifies the nfs4_state_shutdown_net logic a tad. Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 1 + fs/nfsd/nfs4state.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1051bebff1b0..849a7c3ced22 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -80,6 +80,7 @@ struct nfsd_net { */ struct list_head client_lru; struct list_head close_lru; + struct list_head del_recall_lru; struct delayed_work laundromat_work; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 26a03fa6840a..aae93045ce6b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -225,8 +225,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static struct list_head del_recall_lru; - static void nfsd4_free_file(struct nfs4_file *f) { kmem_cache_free(file_slab, f); @@ -2583,6 +2581,9 @@ out: static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2590,7 +2591,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - list_add_tail(&dp->dl_recall_lru, &del_recall_lru); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); /* only place dl_time is set. protected by lock_flocks*/ dp->dl_time = get_seconds(); @@ -3254,7 +3255,7 @@ nfs4_laundromat(struct nfsd_net *nn) expire_client(clp); } spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) continue; @@ -4810,7 +4811,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_ void nfs4_state_init(void) { - INIT_LIST_HEAD(&del_recall_lru); } /* @@ -4874,6 +4874,7 @@ static int nfs4_state_create_net(struct net *net) nn->unconf_name_tree = RB_ROOT; INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); + INIT_LIST_HEAD(&nn->del_recall_lru); spin_lock_init(&nn->client_lock); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); @@ -4986,10 +4987,8 @@ nfs4_state_shutdown_net(struct net *net) INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (dp->dl_stid.sc_client->net != net) - continue; list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&recall_lock); -- cgit v1.2.3 From 41d22663cb6a4108091c050cba3c470a3e175dd9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 21 Mar 2013 15:49:47 -0400 Subject: nfsd4: remove unused nfs4_check_deleg argument Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index aae93045ce6b..795b24d82d18 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2737,7 +2737,7 @@ static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) } static __be32 -nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open, +nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; @@ -3062,7 +3062,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (fp) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; - status = nfs4_check_deleg(cl, fp, open, &dp); + status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; } else { -- cgit v1.2.3 From 9411b1d4c7df26dca6bc6261b5dc87a5b4c81e5c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 1 Apr 2013 16:37:12 -0400 Subject: nfsd4: cleanup handling of nfsv4.0 closed stateid's Closed stateid's are kept around a little while to handle close replays in the 4.0 case. So we stash them in the last-used stateid in the oo_last_closed_stateid field of the open owner. We can free that in encode_seqid_op_tail once the seqid on the open owner is next incremented. But we don't want to do that on the close itself; so we set NFS4_OO_PURGE_CLOSE flag set on the open owner, skip freeing it the first time through encode_seqid_op_tail, then when we see that flag set next time we free it. This is unnecessarily baroque. Instead, just move the logic that increments the seqid out of the xdr code and into the operation code itself. The justification given for the current placement is that we need to wait till the last minute to be sure we know whether the status is a sequence-id-mutating error or not, but examination of the code shows that can't actually happen. Reported-by: Yanchuan Nian Tested-by: Yanchuan Nian Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 ++- fs/nfsd/nfs4state.c | 57 +++++++++++++++++++++++++++++++---------------------- fs/nfsd/nfs4xdr.c | 34 ++++++-------------------------- fs/nfsd/state.h | 4 +--- fs/nfsd/xdr4.h | 1 + 5 files changed, 43 insertions(+), 56 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a9b707b23858..609e1e211330 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -415,7 +415,8 @@ out: nfsd4_cleanup_open_state(open, status); if (open->op_openowner) cstate->replay_owner = &open->op_openowner->oo_owner; - else + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) nfs4_unlock_state(); return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 795b24d82d18..bcd2339ae8c1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -720,6 +720,28 @@ dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) } #endif +/* + * Bump the seqid on cstate->replay_owner, and clear replay_owner if it + * won't be used for replay. + */ +void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (nfserr == nfserr_replay_me) + return; + + if (!seqid_mutating_err(ntohl(nfserr))) { + cstate->replay_owner = NULL; + return; + } + if (!so) + return; + if (so->so_is_open_owner) + release_last_closed_stateid(openowner(so)); + so->so_seqid++; + return; +} static void gen_sessionid(struct nfsd4_session *ses) @@ -3702,6 +3724,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); return status; @@ -3785,31 +3808,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); return status; } -void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so) -{ - struct nfs4_openowner *oo; - struct nfs4_ol_stateid *s; - - if (!so->so_is_open_owner) - return; - oo = openowner(so); - s = oo->oo_last_closed_stid; - if (!s) - return; - if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) { - /* Release the last_closed_stid on the next seqid bump: */ - oo->oo_flags |= NFS4_OO_PURGE_CLOSE; - return; - } - oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; - release_last_closed_stateid(oo); -} - static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { unhash_open_stateid(s); @@ -3838,17 +3842,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, &stp, nn); + nfsd4_bump_seqid(cstate, status); if (status) goto out; oo = openowner(stp->st_stateowner); - status = nfs_ok; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); - release_last_closed_stateid(oo); - oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; - oo->oo_last_closed_stid = stp; + + if (cstate->minorversion) { + unhash_stid(&stp->st_stid); + free_generic_stateid(stp); + } else + oo->oo_last_closed_stid = stp; if (list_empty(&oo->oo_owner.so_stateids)) { if (cstate->minorversion) { @@ -4270,6 +4277,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (status && new_state) release_lockowner(lock_sop); + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); if (file_lock) @@ -4439,6 +4447,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); if (file_lock) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 700de0192834..a5e8a6424843 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1701,28 +1701,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) \ save = resp->p; -/* - * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This - * is where sequence id's are incremented, and the replay cache is filled. - * Note that we increment sequence id's here, at the last moment, so we're sure - * we know whether the error to be returned is a sequence id mutating error. - */ - -static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr) -{ - struct nfs4_stateowner *stateowner = resp->cstate.replay_owner; - - if (seqid_mutating_err(ntohl(nfserr)) && stateowner) { - stateowner->so_seqid++; - stateowner->so_replay.rp_status = nfserr; - stateowner->so_replay.rp_buflen = - (char *)resp->p - (char *)save; - memcpy(stateowner->so_replay.rp_buf, save, - stateowner->so_replay.rp_buflen); - nfsd4_purge_closed_stateid(stateowner); - } -} - /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ @@ -2667,7 +2645,6 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c if (!nfserr) nfsd4_encode_stateid(resp, &close->cl_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2770,7 +2747,6 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo else if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(resp, &lock->lk_denied); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2790,7 +2766,6 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l if (!nfserr) nfsd4_encode_stateid(resp, &locku->lu_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2885,7 +2860,6 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } /* XXX save filehandle here */ out: - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2897,7 +2871,6 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct if (!nfserr) nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2909,7 +2882,6 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc if (!nfserr) nfsd4_encode_stateid(resp, &od->od_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -3567,6 +3539,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { + struct nfs4_stateowner *so = resp->cstate.replay_owner; __be32 *statp; __be32 *p; @@ -3583,6 +3556,11 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) /* nfsd4_check_drc_limit guarantees enough room for error status */ if (!op->status) op->status = nfsd4_check_resp_size(resp, 0); + if (so) { + so->so_replay.rp_status = op->status; + so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); + memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen); + } status: /* * Note: We write the status directly, instead of using WRITE32(), diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 7674bc806200..13ec4853e9af 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -355,7 +355,6 @@ struct nfs4_openowner { struct nfs4_ol_stateid *oo_last_closed_stid; time_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 -#define NFS4_OO_PURGE_CLOSE 2 #define NFS4_OO_NEW 4 unsigned char oo_flags; }; @@ -363,7 +362,7 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ struct list_head lo_owner_ino_hash; /* hash by owner,file */ - struct list_head lo_perstateid; /* for lockowners only */ + struct list_head lo_perstateid; struct list_head lo_list; /* for temporary uses */ }; @@ -477,7 +476,6 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); extern void put_client_renew(struct nfs4_client *clp); -extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 40e05e6d2518..3b271d2092b6 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -623,6 +623,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid); extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); +extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); #endif /* -- cgit v1.2.3 From 2c44a23471d048118e49b616d08df0729cdbd9f1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 14:15:31 +0800 Subject: nfsd: use kmem_cache_free() instead of kfree() memory allocated by kmem_cache_alloc() should be freed using kmem_cache_free(), not kfree(). Signed-off-by: Wei Yongjun Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bcd2339ae8c1..9cb9f6e3f5f2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -364,7 +364,7 @@ kmem_cache *slab) min_stateid = 0; return stid; out_free: - kfree(stid); + kmem_cache_free(slab, stid); return NULL; } -- cgit v1.2.3 From c383747ef674467d02dd9c9320a47de2067b0ce3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 7 Apr 2013 13:21:08 -0400 Subject: nfsd4: remove some redundant comments Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9cb9f6e3f5f2..1226ff6030bd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4432,17 +4432,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, locku->lu_length); nfs4_transform_lock_offset(file_lock); - /* - * Try to unlock the file in the VFS. - */ err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); if (err) { dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - /* - * OK, unlock succeeded; the only thing left to do is update the stateid. - */ update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); -- cgit v1.2.3 From 3d74e6a5b6b0d1e4786d1596081bed6ab63a4cac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 22 Mar 2013 17:44:19 -0400 Subject: nfsd4: no need for replay_owner in sessions case The replay_owner will never be used in the sessions case. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 609e1e211330..c97bb424f55f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -413,7 +413,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, WARN_ON(status && open->op_created); out: nfsd4_cleanup_open_state(open, status); - if (open->op_openowner) + if (open->op_openowner && !nfsd4_has_session(cstate)) cstate->replay_owner = &open->op_openowner->oo_owner; nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1226ff6030bd..16db25dc364f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3666,7 +3666,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, if (status) return status; *stpp = openlockstateid(s); - cstate->replay_owner = (*stpp)->st_stateowner; + if (!nfsd4_has_session(cstate)) + cstate->replay_owner = (*stpp)->st_stateowner; return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); } @@ -3858,10 +3859,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo->oo_last_closed_stid = stp; if (list_empty(&oo->oo_owner.so_stateids)) { - if (cstate->minorversion) { + if (cstate->minorversion) release_openowner(oo); - cstate->replay_owner = NULL; - } else { + else { /* * In the 4.0 case we need to keep the owners around a * little while to handle CLOSE replay. -- cgit v1.2.3 From bbc9c36c31fc5827c22359a8b0ba9dd71b5eecfc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 22 Mar 2013 18:03:49 -0400 Subject: nfsd4: more sessions/open-owner-replay cleanup More logic that's unnecessary in the 4.1 case. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c97bb424f55f..5dee81141ab7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -191,9 +191,18 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) return nfserr_symlink; } +static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) +{ + if (nfsd4_has_session(cstate)) + return; + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + &resfh->fh_handle); +} + static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *resfh; int accmode; __be32 status; @@ -252,9 +261,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o if (is_create_with_attrs(open) && open->op_acl != NULL) do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); - /* set reply cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - &resfh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, resfh); accmode = NFSD_MAY_NOP; if (open->op_created) accmode |= NFSD_MAY_OWNER_OVERRIDE; @@ -268,8 +275,9 @@ out: } static __be32 -do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; __be32 status; /* We don't know the target directory, and therefore can not @@ -278,9 +286,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); - /* set replay cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - ¤t_fh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, current_fh); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); @@ -372,8 +378,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, &cstate->current_fh, - open); + status = do_open_lookup(rqstp, cstate, open); if (status) goto out; break; @@ -386,8 +391,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, &cstate->current_fh, - open); + status = do_open_fhandle(rqstp, cstate, open); if (status) goto out; break; -- cgit v1.2.3 From eb2099f31b0f090684a64ef8df44a30ff7c45fc2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 7 Apr 2013 13:28:16 -0400 Subject: nfsd4: release lockowners on last unlock in 4.1 case In the 4.1 case we're supposed to release lockowners as soon as they're no longer used. It would probably be more efficient to reference count them, but that's slightly fiddly due to the need to have callbacks from locks.c to take into account lock merging and splitting. For most cases just scanning the inode's lock list on unlock for matching locks will be sufficient. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 16db25dc364f..ff1577d6df62 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4387,6 +4387,7 @@ __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) { + struct nfs4_lockowner *lo; struct nfs4_ol_stateid *stp; struct file *filp = NULL; struct file_lock *file_lock = NULL; @@ -4419,9 +4420,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_jukebox; goto out; } + lo = lockowner(stp->st_stateowner); locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; - file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); + file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; file_lock->fl_flags = FL_POSIX; @@ -4440,6 +4442,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) { + WARN_ON_ONCE(cstate->replay_owner); + release_lockowner(lo); + } + out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) -- cgit v1.2.3 From 0c7c3e67ab91ec6caa44bdf1fc89a48012ceb0c5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 28 Mar 2013 20:37:14 -0400 Subject: nfsd4: don't close read-write opens too soon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't actually close any opens until we don't need them at all. This means being left with write access when it's not really necessary, but that's better than putting a file that might still have posix locks held on it, as we have been. Reported-by: Toralf Förster Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ff1577d6df62..7d2e3b54b9df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -307,13 +307,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } -- cgit v1.2.3 From 373cd4098a6392395af63af220d989df00b444f7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Apr 2013 15:42:12 -0400 Subject: nfsd4: cleanup check_forechannel_attrs Pass this struct by reference, not by value, and return an error instead of a boolean to allow for future additions. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7d2e3b54b9df..f1262f73f08b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1803,10 +1803,13 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) -static bool check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca) { - return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ - || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; + if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) + return nfserr_toosmall; + return nfs_ok; } __be32 @@ -1824,8 +1827,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; - if (check_forechannel_attrs(cr_ses->fore_channel)) - return nfserr_toosmall; + status = check_forechannel_attrs(&cr_ses->fore_channel); + if (status) + return status; new = alloc_session(&cr_ses->fore_channel, nn); if (!new) return nfserr_jukebox; -- cgit v1.2.3 From 55c760cfc40d75b4d8a17d56580ec306db2ab14f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Apr 2013 16:44:14 -0400 Subject: nfsd4: fix forechannel attribute negotiation Negotiation of the 4.1 session forechannel attributes is a mess. Fix: - Move it all into check_forechannel_attrs instead of spreading it between that, alloc_session, and init_forechannel_attrs. - set a minimum "slotsize" so that our drc memory limits apply even for small maxresponsesize_cached. This also fixes some bugs when slotsize becomes <= 0. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 116 ++++++++++++++++++++++------------------------------ 1 file changed, 49 insertions(+), 67 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f1262f73f08b..036d5f16fd7f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -776,17 +776,15 @@ free_session_slots(struct nfsd4_session *ses) * We don't actually need to cache the rpc and session headers, so we * can allocate a little less for each slot: */ -static inline int slot_bytes(struct nfsd4_channel_attrs *ca) +static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) { - return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; -} - -static int nfsd4_sanitize_slot_size(u32 size) -{ - size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ - size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); + u32 size; - return size; + if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) + size = 0; + else + size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + return size + sizeof(struct nfsd4_slot); } /* @@ -794,12 +792,12 @@ static int nfsd4_sanitize_slot_size(u32 size) * re-negotiate active sessions and reduce their slot usage to make * room for new connections. For now we just fail the create session. */ -static int nfsd4_get_drc_mem(int slotsize, u32 num) +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) { + u32 slotsize = slot_bytes(ca); + u32 num = ca->maxreqs; int avail; - num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); - spin_lock(&nfsd_drc_lock); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, nfsd_drc_max_mem - nfsd_drc_mem_used); @@ -810,15 +808,19 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num) return num; } -static void nfsd4_put_drc_mem(int slotsize, int num) +static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) { + int slotsize = slot_bytes(ca); + spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * num; + nfsd_drc_mem_used -= slotsize * ca->maxreqs; spin_unlock(&nfsd_drc_lock); } -static struct nfsd4_session *__alloc_session(int slotsize, int numslots) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs) { + int numslots = attrs->maxreqs; + int slotsize = slot_bytes(attrs); struct nfsd4_session *new; int mem, i; @@ -831,8 +833,7 @@ static struct nfsd4_session *__alloc_session(int slotsize, int numslots) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ for (i = 0; i < numslots; i++) { - mem = sizeof(struct nfsd4_slot) + slotsize; - new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); if (!new->se_slots[i]) goto out_free; } @@ -844,21 +845,6 @@ out_free: return NULL; } -static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, - struct nfsd4_channel_attrs *req, - int numslots, int slotsize, - struct nfsd_net *nn) -{ - u32 maxrpc = nn->nfsd_serv->sv_max_mesg; - - new->maxreqs = numslots; - new->maxresp_cached = min_t(u32, req->maxresp_cached, - slotsize + NFSD_MIN_HDR_SEQ_SZ); - new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); - new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); - new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); -} - static void free_conn(struct nfsd4_conn *c) { svc_xprt_put(c->cn_xprt); @@ -960,7 +946,6 @@ static void nfsd4_del_conns(struct nfsd4_session *s) static void __free_session(struct nfsd4_session *ses) { - nfsd4_put_drc_mem(slot_bytes(&ses->se_fchannel), ses->se_fchannel.maxreqs); free_session_slots(ses); kfree(ses); } @@ -971,35 +956,10 @@ static void free_session(struct nfsd4_session *ses) lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); + nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); } -static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, - struct nfsd_net *nn) -{ - struct nfsd4_session *new; - int numslots, slotsize; - /* - * Note decreasing slot size below client's request may - * make it difficult for client to function correctly, whereas - * decreasing the number of slots will (just?) affect - * performance. When short on memory we therefore prefer to - * decrease number of slots instead of their size. - */ - slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); - numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); - if (numslots < 1) - return NULL; - - new = __alloc_session(slotsize, numslots); - if (!new) { - nfsd4_put_drc_mem(slotsize, numslots); - return NULL; - } - init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); - return new; -} - static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; @@ -1022,7 +982,8 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); spin_unlock(&nn->client_lock); - + memcpy(&new->se_fchannel, &cses->fore_channel, + sizeof(struct nfsd4_channel_attrs)); if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); /* @@ -1803,12 +1764,33 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) -static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca) +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) { + u32 maxrpc = nn->nfsd_serv->sv_max_mesg; + if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) return nfserr_toosmall; + ca->headerpadsz = 0; + ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); + ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); + ca->maxresp_cached = min_t(u32, ca->maxresp_cached, + NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); + ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); + /* + * Note decreasing slot size below client's request may make it + * difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. Clients that + * request larger slots than they need will get poor results: + */ + ca->maxreqs = nfsd4_get_drc_mem(ca); + if (!ca->maxreqs) + return nfserr_jukebox; + return nfs_ok; } @@ -1827,13 +1809,13 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; - status = check_forechannel_attrs(&cr_ses->fore_channel); + status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; - new = alloc_session(&cr_ses->fore_channel, nn); - if (!new) - return nfserr_jukebox; status = nfserr_jukebox; + new = alloc_session(&cr_ses->fore_channel); + if (!new) + goto out_release_drc_mem; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; @@ -1892,8 +1874,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); - memcpy(&cr_ses->fore_channel, &new->se_fchannel, - sizeof(struct nfsd4_channel_attrs)); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; @@ -1906,6 +1886,8 @@ out_free_conn: free_conn(conn); out_free_session: __free_session(new); +out_release_drc_mem: + nfsd4_put_drc_mem(&cr_ses->fore_channel); return status; } -- cgit v1.2.3 From 06b332a52293a45324320b6b446a7fa677fb6702 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 9 Apr 2013 11:34:36 -0400 Subject: nfsd4: check backchannel attributes on create_session Make sure the client gives us an adequate backchannel. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 25 +------------------------ fs/nfsd/nfs4state.c | 25 +++++++++++++++++++++++++ fs/nfsd/xdr4cb.h | 23 +++++++++++++++++++++++ 3 files changed, 49 insertions(+), 24 deletions(-) create mode 100644 fs/nfsd/xdr4cb.h (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index be3ff0f3ff68..7f05cd140de3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -37,6 +37,7 @@ #include "nfsd.h" #include "state.h" #include "netns.h" +#include "xdr4cb.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -53,30 +54,6 @@ enum { NFSPROC4_CLNT_CB_SEQUENCE, }; -#define NFS4_MAXTAGLEN 20 - -#define NFS4_enc_cb_null_sz 0 -#define NFS4_dec_cb_null_sz 0 -#define cb_compound_enc_hdr_sz 4 -#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) -#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) -#define cb_sequence_enc_sz (sessionid_sz + 4 + \ - 1 /* no referring calls list yet */) -#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) - -#define op_enc_sz 1 -#define op_dec_sz 2 -#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) -#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) -#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ - cb_sequence_enc_sz + \ - 1 + enc_stateid_sz + \ - enc_nfs4_fh_sz) - -#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + \ - op_dec_sz) - struct nfs4_cb_compound_hdr { /* args */ u32 ident; /* minorversion 0 only */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 036d5f16fd7f..67017fcebb21 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -42,6 +42,7 @@ #include #include #include "xdr4.h" +#include "xdr4cb.h" #include "vfs.h" #include "current_stateid.h" @@ -1794,6 +1795,27 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs return nfs_ok; } +static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) +{ + ca->headerpadsz = 0; + + /* + * These RPC_MAX_HEADER macros are overkill, especially since we + * don't even do gss on the backchannel yet. But this is still + * less than 1k. Tighten up this estimate in the unlikely event + * it turns out to be a problem for some client: + */ + if (ca->maxreq_sz < NFS4_enc_cb_recall_sz + RPC_MAX_HEADER_WITH_AUTH) + return nfserr_toosmall; + if (ca->maxresp_sz < NFS4_dec_cb_recall_sz + RPC_MAX_REPHEADER_WITH_AUTH) + return nfserr_toosmall; + ca->maxresp_cached = 0; + if (ca->maxops < 2) + return nfserr_toosmall; + + return nfs_ok; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1810,6 +1832,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; status = check_forechannel_attrs(&cr_ses->fore_channel, nn); + if (status) + return status; + status = check_backchannel_attrs(&cr_ses->back_channel); if (status) return status; status = nfserr_jukebox; diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h new file mode 100644 index 000000000000..c5c55dfb91a9 --- /dev/null +++ b/fs/nfsd/xdr4cb.h @@ -0,0 +1,23 @@ +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) -- cgit v1.2.3 From 23340032e64d70ce76817a88e8193c8040b095cf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 9 Apr 2013 17:42:28 -0400 Subject: nfsd4: clean up validate_stateid The logic here is better expressed with a switch statement. While we're here, CLOSED stateids (or stateids of an unkown type--which would indicate a server bug) should probably return nfserr_bad_stateid, though this behavior shouldn't affect any non-buggy client. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 67017fcebb21..add9721ab059 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3456,13 +3456,22 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) return status; - if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID))) + switch (s->sc_type) { + case NFS4_DELEG_STID: + return nfs_ok; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ols = openlockstateid(s); + if (ols->st_stateowner->so_is_open_owner + && !(openowner(ols->st_stateowner)->oo_flags + & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; return nfs_ok; - ols = openlockstateid(s); - if (ols->st_stateowner->so_is_open_owner - && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + default: + printk("unknown stateid type %x\n", s->sc_type); + case NFS4_CLOSED_STID: return nfserr_bad_stateid; - return nfs_ok; + } } static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, -- cgit v1.2.3 From 3bd64a5ba1719c2bb6cba4493dfd3e23a7653e54 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 9 Apr 2013 17:02:51 -0400 Subject: nfsd4: implement SEQ4_STATUS_RECALLABLE_STATE_REVOKED A 4.1 server must notify a client that has had any state revoked using the SEQ4_STATUS_RECALLABLE_STATE_REVOKED flag. The client can figure out exactly which state is the problem using CHECK_STATEID and then free it using FREE_STATEID. The status flag will be unset once all such revoked stateids are freed. Our server's only recallable state is delegations. So we keep with each 4.1 client a list of delegations that have timed out and been recalled, but haven't yet been freed by FREE_STATEID. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-------- fs/nfsd/state.h | 3 +++ 2 files changed, 50 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index add9721ab059..3b84700d1bd7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -445,7 +445,6 @@ static void unhash_stid(struct nfs4_stid *s) static void unhash_delegation(struct nfs4_delegation *dp) { - unhash_stid(&dp->dl_stid); list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); list_del_init(&dp->dl_perfile); @@ -454,10 +453,37 @@ unhash_delegation(struct nfs4_delegation *dp) nfs4_put_deleg_lease(dp->dl_file); put_nfs4_file(dp->dl_file); dp->dl_file = NULL; +} + + + +static void destroy_revoked_delegation(struct nfs4_delegation *dp) +{ + list_del_init(&dp->dl_recall_lru); remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } +static void destroy_delegation(struct nfs4_delegation *dp) +{ + unhash_delegation(dp); + remove_stid(&dp->dl_stid); + nfs4_put_delegation(dp); +} + +static void revoke_delegation(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (clp->cl_minorversion == 0) + destroy_delegation(dp); + else { + unhash_delegation(dp); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + } +} + /* * SETCLIENTID state */ @@ -1114,7 +1140,7 @@ destroy_client(struct nfs4_client *clp) spin_unlock(&recall_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); @@ -1310,6 +1336,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); spin_lock_init(&clp->cl_lock); nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); @@ -2171,6 +2198,8 @@ out: default: seq->status_flags = 0; } + if (!list_empty(&clp->cl_revoked)) + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; out_no_session: kfree(conn); spin_unlock(&nn->client_lock); @@ -3297,7 +3326,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + revoke_delegation(dp); } test_val = nn->nfsd4_lease; list_for_each_safe(pos, next, &nn->close_lru) { @@ -3459,6 +3488,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) switch (s->sc_type) { case NFS4_DELEG_STID: return nfs_ok; + case NFS4_REVOKED_DELEG_STID: + return nfserr_deleg_revoked; case NFS4_OPEN_STID: case NFS4_LOCK_STID: ols = openlockstateid(s); @@ -3602,6 +3633,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; + struct nfs4_delegation *dp; struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; @@ -3623,6 +3655,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, else ret = nfserr_locks_held; break; + case NFS4_REVOKED_DELEG_STID: + dp = delegstateid(s); + destroy_revoked_delegation(dp); + ret = nfs_ok; + break; default: ret = nfserr_bad_stateid; } @@ -3647,10 +3684,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID) + if (stp->st_stid.sc_type == NFS4_CLOSED_STID + || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) /* * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step. + * nfserr_replay_me from the previous step, and + * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); @@ -3913,7 +3952,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - unhash_delegation(dp); + destroy_delegation(dp); out: nfs4_unlock_state(); @@ -4763,7 +4802,7 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) spin_unlock(&recall_lock); list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) - unhash_delegation(dp); + revoke_delegation(dp); return count; } @@ -5018,7 +5057,7 @@ nfs4_state_shutdown_net(struct net *net) spin_unlock(&recall_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); } nfsd4_client_tracking_exit(net); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 13ec4853e9af..274e2a114e05 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -79,6 +79,8 @@ struct nfs4_stid { #define NFS4_DELEG_STID 4 /* For an open stateid kept around *only* to process close replays: */ #define NFS4_CLOSED_STID 8 +/* For a deleg stateid kept around only to process free_stateid's: */ +#define NFS4_REVOKED_DELEG_STID 16 unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; @@ -238,6 +240,7 @@ struct nfs4_client { struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; + struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ nfs4_verifier cl_verifier; /* generated by client */ -- cgit v1.2.3 From 53584f66529d4c5940901e4ffe98ad7012dc6e0c Mon Sep 17 00:00:00 2001 From: fanchaoting Date: Thu, 11 Apr 2013 21:24:13 +0800 Subject: nfsd4: remove some useless code The "list_empty(&oo->oo_owner.so_stateids)" is aways true, so remove it. Signed-off-by: fanchaoting Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3b84700d1bd7..a7954913b332 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3919,8 +3919,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * In the 4.0 case we need to keep the owners around a * little while to handle CLOSE replay. */ - if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo, SVC_NET(rqstp)); + move_to_close_lru(oo, SVC_NET(rqstp)); } } out: -- cgit v1.2.3 From 9aeb5aeeb09d59794896ccefd60d58c44987f52f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 16 Apr 2013 21:29:03 -0400 Subject: nfsd4: remove unused macro Cleanup a piece I forgot to remove in 9411b1d4c7df26dca6bc6261b5dc87a5b4c81e5c "nfsd4: cleanup handling of nfsv4.0 closed stateid's". Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a5e8a6424843..1cf154511dae 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1693,14 +1693,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) } while (0) #define ADJUST_ARGS() resp->p = p -/* - * Header routine to setup seqid operation replay cache - */ -#define ENCODE_SEQID_OP_HEAD \ - __be32 *save; \ - \ - save = resp->p; - /* Encode as an array of strings the string given with components * separated @sep, escaped with esc_enter and esc_exit. */ @@ -2640,8 +2632,6 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - ENCODE_SEQID_OP_HEAD; - if (!nfserr) nfsd4_encode_stateid(resp, &close->cl_stateid); @@ -2740,8 +2730,6 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - ENCODE_SEQID_OP_HEAD; - if (!nfserr) nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) @@ -2761,8 +2749,6 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - ENCODE_SEQID_OP_HEAD; - if (!nfserr) nfsd4_encode_stateid(resp, &locku->lu_stateid); @@ -2788,7 +2774,6 @@ static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { __be32 *p; - ENCODE_SEQID_OP_HEAD; if (nfserr) goto out; @@ -2866,8 +2851,6 @@ out: static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - ENCODE_SEQID_OP_HEAD; - if (!nfserr) nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); @@ -2877,8 +2860,6 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - ENCODE_SEQID_OP_HEAD; - if (!nfserr) nfsd4_encode_stateid(resp, &od->od_stateid); -- cgit v1.2.3 From ba138435d1f8b25aa2b787848ee939270a50e34f Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Tue, 16 Apr 2013 22:14:15 -0400 Subject: nfsd4: put_client_renew_locked can be static Reported-by: Fengguang Wu Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a7954913b332..e39bf5381bca 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -193,7 +193,7 @@ renew_client(struct nfs4_client *clp) spin_unlock(&nn->client_lock); } -void put_client_renew_locked(struct nfs4_client *clp) +static void put_client_renew_locked(struct nfs4_client *clp) { if (!atomic_dec_and_test(&clp->cl_refcount)) return; -- cgit v1.2.3 From bf8d909705e9d9bac31d9b8eac6734d2b51332a7 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 19 Apr 2013 16:09:38 -0400 Subject: nfsd: Decode and send 64bit time values The seconds field of an nfstime4 structure is 64bit, but we are assuming that the first 32bits are zero-filled. So if the client tries to set atime to a value before the epoch (touch -t 196001010101), then the server will save the wrong value on disk. Signed-off-by: Bryan Schumaker Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1cf154511dae..888a600dad8c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -344,10 +344,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); + READ64(iattr->ia_atime.tv_sec); READ32(iattr->ia_atime.tv_nsec); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -370,10 +367,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); + READ64(iattr->ia_mtime.tv_sec); READ32(iattr->ia_mtime.tv_nsec); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -2372,8 +2366,7 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); + WRITE64((s64)stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2386,15 +2379,13 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_METADATA) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); + WRITE64((s64)stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); + WRITE64((s64)stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { -- cgit v1.2.3 From aa387d6ce15330e09037947147c5a5a2ba42a0e8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 15 Apr 2013 16:03:46 -0400 Subject: nfsd: fix EXDEV checking in rename We again check for the EXDEV a little later on, so the first check is redundant. This check is also slightly racier, since a badly timed eviction from the export cache could leave us with the two fh_export pointers pointing to two different cache entries which each refer to the same underlying export. It's better to compare vfsmounts as the later check does, but that leaves a minor security hole in the case where the two exports refer to two different directories especially if (for example) they have different root-squashing options. So, compare ex_path.dentry too. Reported-by: Joe Habermann Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2b2e2396a869..84ce601d8063 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1758,10 +1758,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, tdentry = tfhp->fh_dentry; tdir = tdentry->d_inode; - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - if (ffhp->fh_export != tfhp->fh_export) - goto out; - err = nfserr_perm; if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; @@ -1802,6 +1798,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = -EXDEV; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out_dput_new; + if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) + goto out_dput_new; host_err = nfsd_break_lease(odentry->d_inode); if (host_err) -- cgit v1.2.3 From dd30333cf5a2f9dfecda5c6f4523133f13847aae Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 12 Apr 2013 18:10:56 -0400 Subject: nfsd4: better error return to indicate SSV non-support As 4.1 becomes less experimental and SSV still isn't implemented, we have to admit it's not going to be, and return some sensible error rather than just saying "our server's broken". Discussion in the ietf group hasn't turned up any objections to using NFS4ERR_ENC_ALG_UNSUPP for that purpose. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e39bf5381bca..a964a1761077 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1651,6 +1651,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: + return nfserr_encr_alg_unsupp; case SP4_MACH_CRED: return nfserr_serverfault; /* no excuse :-/ */ } -- cgit v1.2.3 From 2a6cf944c2f8ad5a7ef599ed275b85fa56eba3fc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 30 Apr 2013 15:28:51 -0400 Subject: nfsd4: don't remap EISDIR errors in rename We're going out of our way here to remap an error to make rfc 3530 happy--but the rfc itself (nor rfc 1813, which has similar language) gives no justification. And disagrees with local filesystem behavior, with Linux and posix man pages, and knfsd's implemented behavior for v2 and v3. And the documented behavior seems better, in that it gives a little more information--you could implement the 3530 behavior using the posix behavior, but not the other way around. Also, the Linux client makes no attempt to remap this error in the v4 case, so it can end up just returning EEXIST to the application in a case where it should return EISDIR. So honestly I think the rfc's are just buggy here--or in any case it doesn't see worth the trouble to remap this error. Reported-by: Frank S Filz Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5dee81141ab7..8ae5abfe6ba2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -813,21 +813,11 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, rename->rn_tname, rename->rn_tnamelen); - - /* the underlying filesystem returns different error's than required - * by NFSv4. both save_fh and current_fh have been verified.. */ - if (status == nfserr_isdir) - status = nfserr_exist; - else if ((status == nfserr_notdir) && - (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && - S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) - status = nfserr_exist; - - if (!status) { - set_change_info(&rename->rn_sinfo, &cstate->current_fh); - set_change_info(&rename->rn_tinfo, &cstate->save_fh); - } - return status; + if (status) + return status; + set_change_info(&rename->rn_sinfo, &cstate->current_fh); + set_change_info(&rename->rn_tinfo, &cstate->save_fh); + return nfs_ok; } static __be32 -- cgit v1.2.3 From c8c797f9fdf655245c0d60667af6efa7b32c98f1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 5 Apr 2013 21:22:39 +0800 Subject: nfsd: make symbol nfsd_reply_cache_shrinker static symbol 'nfsd_reply_cache_shrinker' only used within this file. It should be static. Signed-off-by: Wei Yongjun Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index eb2587745a64..e76244edd748 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -62,7 +62,7 @@ static void cache_cleaner_func(struct work_struct *unused); static int nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc); -struct shrinker nfsd_reply_cache_shrinker = { +static struct shrinker nfsd_reply_cache_shrinker = { .shrink = nfsd_reply_cache_shrink, .seeks = 1, }; -- cgit v1.2.3 From ed9411a00464860cafe7e07224818cdf04fd9e89 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 30 Apr 2013 18:48:45 -0400 Subject: NFSD: Simplify GSS flavor encoding in nfsd4_do_encode_secinfo() Clean up. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 78272185a13d..a885e97dc5f4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3119,17 +3119,11 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, struct rpcsec_gss_info info; if (rpcauth_get_gssinfo(flavs[i].pseudoflavor, &info) == 0) { - RESERVE_SPACE(4); + RESERVE_SPACE(4 + 4 + info.oid.len + 4 + 4); WRITE32(RPC_AUTH_GSS); - ADJUST_ARGS(); - RESERVE_SPACE(4 + info.oid.len); WRITE32(info.oid.len); WRITEMEM(info.oid.data, info.oid.len); - ADJUST_ARGS(); - RESERVE_SPACE(4); WRITE32(info.qop); - ADJUST_ARGS(); - RESERVE_SPACE(4); WRITE32(info.service); ADJUST_ARGS(); } else { -- cgit v1.2.3 From 676e4ebd5f2c3b4fd1d2bff79b68385c23c5c105 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 30 Apr 2013 18:48:54 -0400 Subject: NFSD: SECINFO doesn't handle unsupported pseudoflavors correctly If nfsd4_do_encode_secinfo() can't find GSS info that matches an export security flavor, it assumes the flavor is not a GSS pseudoflavor, and simply puts it on the wire. However, if this XDR encoding logic is given a legitimate GSS pseudoflavor but the RPC layer says it does not support that pseudoflavor for some reason, then the server leaks GSS pseudoflavor numbers onto the wire. I confirmed this happens by blacklisting rpcsec_gss_krb5, then attempted a client transition from the pseudo-fs to a Kerberos-only share. The client received a flavor list containing the Kerberos pseudoflavor numbers, rather than GSS tuples. The encoder logic can check that each pseudoflavor in flavs[] is less than MAXFLAVOR before writing it into the buffer, to prevent this. But after "nflavs" is written into the XDR buffer, the encoder can't skip writing flavor information into the buffer when it discovers the RPC layer doesn't support that flavor. So count the number of valid flavors as they are written into the XDR buffer, then write that count into a placeholder in the XDR buffer when all recognized flavors have been encoded. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a885e97dc5f4..6cd86e0fe450 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3085,10 +3085,11 @@ static __be32 nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_export *exp) { - u32 i, nflavs; + u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - __be32 *p; + __be32 *p, *flavorsp; + static bool report = true; if (nfserr) goto out; @@ -3112,13 +3113,17 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, } } + supported = 0; RESERVE_SPACE(4); - WRITE32(nflavs); + flavorsp = p++; /* to be backfilled later */ ADJUST_ARGS(); + for (i = 0; i < nflavs; i++) { + rpc_authflavor_t pf = flavs[i].pseudoflavor; struct rpcsec_gss_info info; - if (rpcauth_get_gssinfo(flavs[i].pseudoflavor, &info) == 0) { + if (rpcauth_get_gssinfo(pf, &info) == 0) { + supported++; RESERVE_SPACE(4 + 4 + info.oid.len + 4 + 4); WRITE32(RPC_AUTH_GSS); WRITE32(info.oid.len); @@ -3126,13 +3131,22 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITE32(info.qop); WRITE32(info.service); ADJUST_ARGS(); - } else { + } else if (pf < RPC_AUTH_MAXFLAVOR) { + supported++; RESERVE_SPACE(4); - WRITE32(flavs[i].pseudoflavor); + WRITE32(pf); ADJUST_ARGS(); + } else { + if (report) + pr_warn("NFS: SECINFO: security flavor %u " + "is not supported\n", pf); } } + if (nflavs != supported) + report = false; + *flavorsp = htonl(supported); + out: if (exp) exp_put(exp); -- cgit v1.2.3