Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfsd
20 files changed, 16836 insertions, 0 deletions
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
new file mode 100644
index 000000000000..b8680a247f8b
--- /dev/null
+++ b/fs/nfsd/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the Linux nfs server
+#
+
+obj-$(CONFIG_NFSD)	+= nfsd.o
+
+nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+			   nfs4acl.o nfs4callback.o
+nfsd-objs		:= $(nfsd-y)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
new file mode 100644
index 000000000000..cfe9ce881613
--- /dev/null
+++ b/fs/nfsd/auth.c
@@ -0,0 +1,63 @@
+/*
+ * linux/fs/nfsd/auth.c
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/nfsd/nfsd.h>
+
+#define	CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
+
+int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+	struct svc_cred	*cred = &rqstp->rq_cred;
+	int i;
+	int ret;
+
+	if (exp->ex_flags & NFSEXP_ALLSQUASH) {
+		cred->cr_uid = exp->ex_anon_uid;
+		cred->cr_gid = exp->ex_anon_gid;
+		put_group_info(cred->cr_group_info);
+		cred->cr_group_info = groups_alloc(0);
+	} else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
+		struct group_info *gi;
+		if (!cred->cr_uid)
+			cred->cr_uid = exp->ex_anon_uid;
+		if (!cred->cr_gid)
+			cred->cr_gid = exp->ex_anon_gid;
+		gi = groups_alloc(cred->cr_group_info->ngroups);
+		if (gi)
+			for (i = 0; i < cred->cr_group_info->ngroups; i++) {
+				if (!GROUP_AT(cred->cr_group_info, i))
+					GROUP_AT(gi, i) = exp->ex_anon_gid;
+				else
+					GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i);
+			}
+		put_group_info(cred->cr_group_info);
+		cred->cr_group_info = gi;
+	}
+
+	if (cred->cr_uid != (uid_t) -1)
+		current->fsuid = cred->cr_uid;
+	else
+		current->fsuid = exp->ex_anon_uid;
+	if (cred->cr_gid != (gid_t) -1)
+		current->fsgid = cred->cr_gid;
+	else
+		current->fsgid = exp->ex_anon_gid;
+
+	if (!cred->cr_group_info)
+		return -ENOMEM;
+	ret = set_current_groups(cred->cr_group_info);
+	if ((cred->cr_uid)) {
+		cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
+	} else {
+		cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
+						  current->cap_permitted);
+	}
+	return ret;
+}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
new file mode 100644
index 000000000000..9a11aa39e2e4
--- /dev/null
+++ b/fs/nfsd/export.c
@@ -0,0 +1,1200 @@
+#define MSNFS	/* HACK HACK */
+/*
+ * linux/fs/nfsd/export.c
+ *
+ * NFS exporting and validation.
+ *
+ * We maintain a list of clients, each of which has a list of
+ * exports. To export an fs to a given client, you first have
+ * to create the client entry with NFSCTL_ADDCLIENT, which
+ * creates a client control block and adds it to the hash
+ * table. Then, you call NFSCTL_EXPORT for each fs.
+ *
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
+ */
+
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/in.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/rwsem.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/hash.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/nfsfh.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/lockd/bind.h>
+
+#define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+#define NFSD_PARANOIA 1
+
+typedef struct auth_domain	svc_client;
+typedef struct svc_export	svc_export;
+
+static void		exp_do_unexport(svc_export *unexp);
+static int		exp_verify_string(char *cp, int max);
+
+/*
+ * We have two caches.
+ * One maps client+vfsmnt+dentry to export options - the export map
+ * The other maps client+filehandle-fragment to export options. - the expkey map
+ *
+ * The export options are actually stored in the first map, and the
+ * second map contains a reference to the entry in the first map.
+ */
+
+#define	EXPKEY_HASHBITS		8
+#define	EXPKEY_HASHMAX		(1 << EXPKEY_HASHBITS)
+#define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
+static struct cache_head *expkey_table[EXPKEY_HASHMAX];
+
+static inline int svc_expkey_hash(struct svc_expkey *item)
+{
+	int hash = item->ek_fsidtype;
+	char * cp = (char*)item->ek_fsid;
+	int len = key_len(item->ek_fsidtype);
+
+	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+	return hash & EXPKEY_HASHMASK;
+}
+
+void expkey_put(struct cache_head *item, struct cache_detail *cd)
+{
+	if (cache_put(item, cd)) {
+		struct svc_expkey *key = container_of(item, struct svc_expkey, h);
+		if (test_bit(CACHE_VALID, &item->flags) &&
+		    !test_bit(CACHE_NEGATIVE, &item->flags))
+			exp_put(key->ek_export);
+		auth_domain_put(key->ek_client);
+		kfree(key);
+	}
+}
+
+static void expkey_request(struct cache_detail *cd,
+			   struct cache_head *h,
+			   char **bpp, int *blen)
+{
+	/* client fsidtype \xfsid */
+	struct svc_expkey *ek = container_of(h, struct svc_expkey, h);
+	char type[5];
+
+	qword_add(bpp, blen, ek->ek_client->name);
+	snprintf(type, 5, "%d", ek->ek_fsidtype);
+	qword_add(bpp, blen, type);
+	qword_addhex(bpp, blen, (char*)ek->ek_fsid, key_len(ek->ek_fsidtype));
+	(*bpp)[-1] = '\n';
+}
+
+static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int);
+static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+	/* client fsidtype fsid [path] */
+	char *buf;
+	int len;
+	struct auth_domain *dom = NULL;
+	int err;
+	int fsidtype;
+	char *ep;
+	struct svc_expkey key;
+
+	if (mesg[mlen-1] != '\n')
+		return -EINVAL;
+	mesg[mlen-1] = 0;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!buf) goto out;
+
+	err = -EINVAL;
+	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+		goto out;
+
+	err = -ENOENT;
+	dom = auth_domain_find(buf);
+	if (!dom)
+		goto out;
+	dprintk("found domain %s\n", buf);
+
+	err = -EINVAL;
+	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+		goto out;
+	fsidtype = simple_strtoul(buf, &ep, 10);
+	if (*ep)
+		goto out;
+	dprintk("found fsidtype %d\n", fsidtype);
+	if (fsidtype > 2)
+		goto out;
+	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+		goto out;
+	dprintk("found fsid length %d\n", len);
+	if (len != key_len(fsidtype))
+		goto out;
+
+	/* OK, we seem to have a valid key */
+	key.h.flags = 0;
+	key.h.expiry_time = get_expiry(&mesg);
+	if (key.h.expiry_time == 0)
+		goto out;
+
+	key.ek_client = dom;	
+	key.ek_fsidtype = fsidtype;
+	memcpy(key.ek_fsid, buf, len);
+
+	/* now we want a pathname, or empty meaning NEGATIVE  */
+	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0)
+		goto out;
+	dprintk("Path seems to be <%s>\n", buf);
+	err = 0;
+	if (len == 0) {
+		struct svc_expkey *ek;
+		set_bit(CACHE_NEGATIVE, &key.h.flags);
+		ek = svc_expkey_lookup(&key, 1);
+		if (ek)
+			expkey_put(&ek->h, &svc_expkey_cache);
+	} else {
+		struct nameidata nd;
+		struct svc_expkey *ek;
+		struct svc_export *exp;
+		err = path_lookup(buf, 0, &nd);
+		if (err)
+			goto out;
+
+		dprintk("Found the path %s\n", buf);
+		exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
+
+		err = -ENOENT;
+		if (!exp)
+			goto out_nd;
+		key.ek_export = exp;
+		dprintk("And found export\n");
+		
+		ek = svc_expkey_lookup(&key, 1);
+		if (ek)
+			expkey_put(&ek->h, &svc_expkey_cache);
+		exp_put(exp);
+		err = 0;
+	out_nd:
+		path_release(&nd);
+	}
+	cache_flush();
+ out:
+	if (dom)
+		auth_domain_put(dom);
+	if (buf)
+		kfree(buf);
+	return err;
+}
+
+static int expkey_show(struct seq_file *m,
+		       struct cache_detail *cd,
+		       struct cache_head *h)
+{
+	struct svc_expkey *ek ;
+
+	if (h ==NULL) {
+		seq_puts(m, "#domain fsidtype fsid [path]\n");
+		return 0;
+	}
+	ek = container_of(h, struct svc_expkey, h);
+	seq_printf(m, "%s %d 0x%08x", ek->ek_client->name,
+		   ek->ek_fsidtype, ek->ek_fsid[0]);
+	if (ek->ek_fsidtype != 1)
+		seq_printf(m, "%08x", ek->ek_fsid[1]);
+	if (ek->ek_fsidtype == 2)
+		seq_printf(m, "%08x", ek->ek_fsid[2]);
+	if (test_bit(CACHE_VALID, &h->flags) && 
+	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
+		seq_printf(m, " ");
+		seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n");
+	}
+	seq_printf(m, "\n");
+	return 0;
+}
+	
+struct cache_detail svc_expkey_cache = {
+	.hash_size	= EXPKEY_HASHMAX,
+	.hash_table	= expkey_table,
+	.name		= "nfsd.fh",
+	.cache_put	= expkey_put,
+	.cache_request	= expkey_request,
+	.cache_parse	= expkey_parse,
+	.cache_show	= expkey_show,
+};
+
+static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
+{
+	if (a->ek_fsidtype != b->ek_fsidtype ||
+	    a->ek_client != b->ek_client ||
+	    memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0)
+		return 0;
+	return 1;
+}
+
+static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
+{
+	cache_get(&item->ek_client->h);
+	new->ek_client = item->ek_client;
+	new->ek_fsidtype = item->ek_fsidtype;
+	new->ek_fsid[0] = item->ek_fsid[0];
+	new->ek_fsid[1] = item->ek_fsid[1];
+	new->ek_fsid[2] = item->ek_fsid[2];
+}
+
+static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
+{
+	cache_get(&item->ek_export->h);
+	new->ek_export = item->ek_export;
+}
+
+static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
+
+#define	EXPORT_HASHBITS		8
+#define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
+#define	EXPORT_HASHMASK		(EXPORT_HASHMAX -1)
+
+static struct cache_head *export_table[EXPORT_HASHMAX];
+
+static inline int svc_export_hash(struct svc_export *item)
+{
+	int rv;
+
+	rv = hash_ptr(item->ex_client, EXPORT_HASHBITS);
+	rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS);
+	rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS);
+	return rv;
+}
+
+void svc_export_put(struct cache_head *item, struct cache_detail *cd)
+{
+	if (cache_put(item, cd)) {
+		struct svc_export *exp = container_of(item, struct svc_export, h);
+		dput(exp->ex_dentry);
+		mntput(exp->ex_mnt);
+		auth_domain_put(exp->ex_client);
+		kfree(exp);
+	}
+}
+
+static void svc_export_request(struct cache_detail *cd,
+			       struct cache_head *h,
+			       char **bpp, int *blen)
+{
+	/*  client path */
+	struct svc_export *exp = container_of(h, struct svc_export, h);
+	char *pth;
+
+	qword_add(bpp, blen, exp->ex_client->name);
+	pth = d_path(exp->ex_dentry, exp->ex_mnt, *bpp, *blen);
+	if (IS_ERR(pth)) {
+		/* is this correct? */
+		(*bpp)[0] = '\n';
+		return;
+	}
+	qword_add(bpp, blen, pth);
+	(*bpp)[-1] = '\n';
+}
+
+static struct svc_export *svc_export_lookup(struct svc_export *, int);
+
+static int check_export(struct inode *inode, int flags)
+{
+
+	/* We currently export only dirs and regular files.
+	 * This is what umountd does.
+	 */
+	if (!S_ISDIR(inode->i_mode) &&
+	    !S_ISREG(inode->i_mode))
+		return -ENOTDIR;
+
+	/* There are two requirements on a filesystem to be exportable.
+	 * 1:  We must be able to identify the filesystem from a number.
+	 *       either a device number (so FS_REQUIRES_DEV needed)
+	 *       or an FSID number (so NFSEXP_FSID needed).
+	 * 2:  We must be able to find an inode from a filehandle.
+	 *       This means that s_export_op must be set.
+	 */
+	if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
+	    !(flags & NFSEXP_FSID)) {
+		dprintk("exp_export: export of non-dev fs without fsid");
+		return -EINVAL;
+	}
+	if (!inode->i_sb->s_export_op) {
+		dprintk("exp_export: export of invalid fs type.\n");
+		return -EINVAL;
+	}
+
+	/* Ok, we can export it */;
+	if (!inode->i_sb->s_export_op->find_exported_dentry)
+		inode->i_sb->s_export_op->find_exported_dentry =
+			find_exported_dentry;
+	return 0;
+
+}
+
+static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+	/* client path expiry [flags anonuid anongid fsid] */
+	char *buf;
+	int len;
+	int err;
+	struct auth_domain *dom = NULL;
+	struct nameidata nd;
+	struct svc_export exp, *expp;
+	int an_int;
+
+	nd.dentry = NULL;
+
+	if (mesg[mlen-1] != '\n')
+		return -EINVAL;
+	mesg[mlen-1] = 0;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!buf) goto out;
+
+	/* client */
+	len = qword_get(&mesg, buf, PAGE_SIZE);
+	err = -EINVAL;
+	if (len <= 0) goto out;
+
+	err = -ENOENT;
+	dom = auth_domain_find(buf);
+	if (!dom)
+		goto out;
+
+	/* path */
+	err = -EINVAL;
+	if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+		goto out;
+	err = path_lookup(buf, 0, &nd);
+	if (err) goto out;
+
+	exp.h.flags = 0;
+	exp.ex_client = dom;
+	exp.ex_mnt = nd.mnt;
+	exp.ex_dentry = nd.dentry;
+
+	/* expiry */
+	err = -EINVAL;
+	exp.h.expiry_time = get_expiry(&mesg);
+	if (exp.h.expiry_time == 0)
+		goto out;
+
+	/* flags */
+	err = get_int(&mesg, &an_int);
+	if (err == -ENOENT)
+		set_bit(CACHE_NEGATIVE, &exp.h.flags);
+	else {
+		if (err || an_int < 0) goto out;	
+		exp.ex_flags= an_int;
+	
+		/* anon uid */
+		err = get_int(&mesg, &an_int);
+		if (err) goto out;
+		exp.ex_anon_uid= an_int;
+
+		/* anon gid */
+		err = get_int(&mesg, &an_int);
+		if (err) goto out;
+		exp.ex_anon_gid= an_int;
+
+		/* fsid */
+		err = get_int(&mesg, &an_int);
+		if (err) goto out;
+		exp.ex_fsid = an_int;
+
+		err = check_export(nd.dentry->d_inode, exp.ex_flags);
+		if (err) goto out;
+	}
+
+	expp = svc_export_lookup(&exp, 1);
+	if (expp)
+		exp_put(expp);
+	err = 0;
+	cache_flush();
+ out:
+	if (nd.dentry)
+		path_release(&nd);
+	if (dom)
+		auth_domain_put(dom);
+	if (buf)
+		kfree(buf);
+	return err;
+}
+
+static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong);
+
+static int svc_export_show(struct seq_file *m,
+			   struct cache_detail *cd,
+			   struct cache_head *h)
+{
+	struct svc_export *exp ;
+
+	if (h ==NULL) {
+		seq_puts(m, "#path domain(flags)\n");
+		return 0;
+	}
+	exp = container_of(h, struct svc_export, h);
+	seq_path(m, exp->ex_mnt, exp->ex_dentry, " \t\n\\");
+	seq_putc(m, '\t');
+	seq_escape(m, exp->ex_client->name, " \t\n\\");
+	seq_putc(m, '(');
+	if (test_bit(CACHE_VALID, &h->flags) && 
+	    !test_bit(CACHE_NEGATIVE, &h->flags))
+		exp_flags(m, exp->ex_flags, exp->ex_fsid, 
+			  exp->ex_anon_uid, exp->ex_anon_gid);
+	seq_puts(m, ")\n");
+	return 0;
+}
+struct cache_detail svc_export_cache = {
+	.hash_size	= EXPORT_HASHMAX,
+	.hash_table	= export_table,
+	.name		= "nfsd.export",
+	.cache_put	= svc_export_put,
+	.cache_request	= svc_export_request,
+	.cache_parse	= svc_export_parse,
+	.cache_show	= svc_export_show,
+};
+
+static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
+{
+	return a->ex_client == b->ex_client &&
+		a->ex_dentry == b->ex_dentry &&
+		a->ex_mnt == b->ex_mnt;
+}
+static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
+{
+	cache_get(&item->ex_client->h);
+	new->ex_client = item->ex_client;
+	new->ex_dentry = dget(item->ex_dentry);
+	new->ex_mnt = mntget(item->ex_mnt);
+}
+
+static inline void svc_export_update(struct svc_export *new, struct svc_export *item)
+{
+	new->ex_flags = item->ex_flags;
+	new->ex_anon_uid = item->ex_anon_uid;
+	new->ex_anon_gid = item->ex_anon_gid;
+	new->ex_fsid = item->ex_fsid;
+}
+
+static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
+
+
+struct svc_expkey *
+exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
+{
+	struct svc_expkey key, *ek;
+	int err;
+	
+	if (!clp)
+		return NULL;
+
+	key.ek_client = clp;
+	key.ek_fsidtype = fsid_type;
+	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+
+	ek = svc_expkey_lookup(&key, 0);
+	if (ek != NULL)
+		if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp)))
+			ek = ERR_PTR(err);
+	return ek;
+}
+
+static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
+		       struct svc_export *exp)
+{
+	struct svc_expkey key, *ek;
+
+	key.ek_client = clp;
+	key.ek_fsidtype = fsid_type;
+	memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+	key.ek_export = exp;
+	key.h.expiry_time = NEVER;
+	key.h.flags = 0;
+
+	ek = svc_expkey_lookup(&key, 1);
+	if (ek) {
+		expkey_put(&ek->h, &svc_expkey_cache);
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+/*
+ * Find the client's export entry matching xdev/xino.
+ */
+static inline struct svc_expkey *
+exp_get_key(svc_client *clp, dev_t dev, ino_t ino)
+{
+	u32 fsidv[3];
+	
+	if (old_valid_dev(dev)) {
+		mk_fsid_v0(fsidv, dev, ino);
+		return exp_find_key(clp, 0, fsidv, NULL);
+	}
+	mk_fsid_v3(fsidv, dev, ino);
+	return exp_find_key(clp, 3, fsidv, NULL);
+}
+
+/*
+ * Find the client's export entry matching fsid
+ */
+static inline struct svc_expkey *
+exp_get_fsid_key(svc_client *clp, int fsid)
+{
+	u32 fsidv[2];
+
+	mk_fsid_v1(fsidv, fsid);
+
+	return exp_find_key(clp, 1, fsidv, NULL);
+}
+
+svc_export *
+exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
+		struct cache_req *reqp)
+{
+	struct svc_export *exp, key;
+	
+	if (!clp)
+		return NULL;
+
+	key.ex_client = clp;
+	key.ex_mnt = mnt;
+	key.ex_dentry = dentry;
+
+	exp = svc_export_lookup(&key, 0);
+	if (exp != NULL) 
+		switch (cache_check(&svc_export_cache, &exp->h, reqp)) {
+		case 0: break;
+		case -EAGAIN:
+			exp = ERR_PTR(-EAGAIN);
+			break;
+		default:
+			exp = NULL;
+		}
+
+	return exp;
+}
+
+/*
+ * Find the export entry for a given dentry.
+ */
+struct svc_export *
+exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
+	   struct cache_req *reqp)
+{
+	svc_export *exp;
+
+	dget(dentry);
+	exp = exp_get_by_name(clp, mnt, dentry, reqp);
+
+	while (exp == NULL && !IS_ROOT(dentry)) {
+		struct dentry *parent;
+
+		parent = dget_parent(dentry);
+		dput(dentry);
+		dentry = parent;
+		exp = exp_get_by_name(clp, mnt, dentry, reqp);
+	}
+	dput(dentry);
+	return exp;
+}
+
+/*
+ * Hashtable locking. Write locks are placed only by user processes
+ * wanting to modify export information.
+ * Write locking only done in this file.  Read locking
+ * needed externally.
+ */
+
+static DECLARE_RWSEM(hash_sem);
+
+void
+exp_readlock(void)
+{
+	down_read(&hash_sem);
+}
+
+static inline void
+exp_writelock(void)
+{
+	down_write(&hash_sem);
+}
+
+void
+exp_readunlock(void)
+{
+	up_read(&hash_sem);
+}
+
+static inline void
+exp_writeunlock(void)
+{
+	up_write(&hash_sem);
+}
+
+static void exp_fsid_unhash(struct svc_export *exp)
+{
+	struct svc_expkey *ek;
+
+	if ((exp->ex_flags & NFSEXP_FSID) == 0)
+		return;
+
+	ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
+	if (ek && !IS_ERR(ek)) {
+		ek->h.expiry_time = get_seconds()-1;
+		expkey_put(&ek->h, &svc_expkey_cache);
+	}
+	svc_expkey_cache.nextcheck = get_seconds();
+}
+
+static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
+{
+	u32 fsid[2];
+ 
+	if ((exp->ex_flags & NFSEXP_FSID) == 0)
+		return 0;
+
+	mk_fsid_v1(fsid, exp->ex_fsid);
+	return exp_set_key(clp, 1, fsid, exp);
+}
+
+static int exp_hash(struct auth_domain *clp, struct svc_export *exp)
+{
+	u32 fsid[2];
+	struct inode *inode = exp->ex_dentry->d_inode;
+	dev_t dev = inode->i_sb->s_dev;
+
+	if (old_valid_dev(dev)) {
+		mk_fsid_v0(fsid, dev, inode->i_ino);
+		return exp_set_key(clp, 0, fsid, exp);
+	}
+	mk_fsid_v3(fsid, dev, inode->i_ino);
+	return exp_set_key(clp, 3, fsid, exp);
+}
+
+static void exp_unhash(struct svc_export *exp)
+{
+	struct svc_expkey *ek;
+	struct inode *inode = exp->ex_dentry->d_inode;
+
+	ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
+	if (ek && !IS_ERR(ek)) {
+		ek->h.expiry_time = get_seconds()-1;
+		expkey_put(&ek->h, &svc_expkey_cache);
+	}
+	svc_expkey_cache.nextcheck = get_seconds();
+}
+	
+/*
+ * Export a file system.
+ */
+int
+exp_export(struct nfsctl_export *nxp)
+{
+	svc_client	*clp;
+	struct svc_export	*exp = NULL;
+	struct svc_export	new;
+	struct svc_expkey	*fsid_key = NULL;
+	struct nameidata nd;
+	int		err;
+
+	/* Consistency check */
+	err = -EINVAL;
+	if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) ||
+	    !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX))
+		goto out;
+
+	dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n",
+			nxp->ex_client, nxp->ex_path,
+			(unsigned)nxp->ex_dev, (long)nxp->ex_ino,
+			nxp->ex_flags);
+
+	/* Try to lock the export table for update */
+	exp_writelock();
+
+	/* Look up client info */
+	if (!(clp = auth_domain_find(nxp->ex_client)))
+		goto out_unlock;
+
+
+	/* Look up the dentry */
+	err = path_lookup(nxp->ex_path, 0, &nd);
+	if (err)
+		goto out_unlock;
+	err = -EINVAL;
+
+	exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL);
+
+	/* must make sure there won't be an ex_fsid clash */
+	if ((nxp->ex_flags & NFSEXP_FSID) &&
+	    (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
+	    !IS_ERR(fsid_key) &&
+	    fsid_key->ek_export &&
+	    fsid_key->ek_export != exp)
+		goto finish;
+
+	if (exp) {
+		/* just a flags/id/fsid update */
+
+		exp_fsid_unhash(exp);
+		exp->ex_flags    = nxp->ex_flags;
+		exp->ex_anon_uid = nxp->ex_anon_uid;
+		exp->ex_anon_gid = nxp->ex_anon_gid;
+		exp->ex_fsid     = nxp->ex_dev;
+
+		err = exp_fsid_hash(clp, exp);
+		goto finish;
+	}
+
+	err = check_export(nd.dentry->d_inode, nxp->ex_flags);
+	if (err) goto finish;
+
+	err = -ENOMEM;
+
+	dprintk("nfsd: creating export entry %p for client %p\n", exp, clp);
+
+	new.h.expiry_time = NEVER;
+	new.h.flags = 0;
+	new.ex_client = clp;
+	new.ex_mnt = nd.mnt;
+	new.ex_dentry = nd.dentry;
+	new.ex_flags = nxp->ex_flags;
+	new.ex_anon_uid = nxp->ex_anon_uid;
+	new.ex_anon_gid = nxp->ex_anon_gid;
+	new.ex_fsid = nxp->ex_dev;
+
+	exp = svc_export_lookup(&new, 1);
+
+	if (exp == NULL)
+		goto finish;
+
+	err = 0;
+
+	if (exp_hash(clp, exp) ||
+	    exp_fsid_hash(clp, exp)) {
+		/* failed to create at least one index */
+		exp_do_unexport(exp);
+		cache_flush();
+		err = -ENOMEM;
+	}
+
+finish:
+	if (exp)
+		exp_put(exp);
+	if (fsid_key && !IS_ERR(fsid_key))
+		expkey_put(&fsid_key->h, &svc_expkey_cache);
+	if (clp)
+		auth_domain_put(clp);
+	path_release(&nd);
+out_unlock:
+	exp_writeunlock();
+out:
+	return err;
+}
+
+/*
+ * Unexport a file system. The export entry has already
+ * been removed from the client's list of exported fs's.
+ */
+static void
+exp_do_unexport(svc_export *unexp)
+{
+	unexp->h.expiry_time = get_seconds()-1;
+	svc_export_cache.nextcheck = get_seconds();
+	exp_unhash(unexp);
+	exp_fsid_unhash(unexp);
+}
+
+
+/*
+ * unexport syscall.
+ */
+int
+exp_unexport(struct nfsctl_export *nxp)
+{
+	struct auth_domain *dom;
+	svc_export *exp;
+	struct nameidata nd;
+	int		err;
+
+	/* Consistency check */
+	if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) ||
+	    !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX))
+		return -EINVAL;
+
+	exp_writelock();
+
+	err = -EINVAL;
+	dom = auth_domain_find(nxp->ex_client);
+	if (!dom) {
+		dprintk("nfsd: unexport couldn't find %s\n", nxp->ex_client);
+		goto out_unlock;
+	}
+
+	err = path_lookup(nxp->ex_path, 0, &nd);
+	if (err)
+		goto out_domain;
+
+	err = -EINVAL;
+	exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
+	path_release(&nd);
+	if (!exp)
+		goto out_domain;
+
+	exp_do_unexport(exp);
+	exp_put(exp);
+	err = 0;
+
+out_domain:
+	auth_domain_put(dom);
+	cache_flush();
+out_unlock:
+	exp_writeunlock();
+	return err;
+}
+
+/*
+ * Obtain the root fh on behalf of a client.
+ * This could be done in user space, but I feel that it adds some safety
+ * since its harder to fool a kernel module than a user space program.
+ */
+int
+exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
+{
+	struct svc_export	*exp;
+	struct nameidata	nd;
+	struct inode		*inode;
+	struct svc_fh		fh;
+	int			err;
+
+	err = -EPERM;
+	/* NB: we probably ought to check that it's NUL-terminated */
+	if (path_lookup(path, 0, &nd)) {
+		printk("nfsd: exp_rootfh path not found %s", path);
+		return err;
+	}
+	inode = nd.dentry->d_inode;
+
+	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
+		 path, nd.dentry, clp->name,
+		 inode->i_sb->s_id, inode->i_ino);
+	exp = exp_parent(clp, nd.mnt, nd.dentry, NULL);
+	if (!exp) {
+		dprintk("nfsd: exp_rootfh export not found.\n");
+		goto out;
+	}
+
+	/*
+	 * fh must be initialized before calling fh_compose
+	 */
+	fh_init(&fh, maxsize);
+	if (fh_compose(&fh, exp, nd.dentry, NULL))
+		err = -EINVAL;
+	else
+		err = 0;
+	memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh));
+	fh_put(&fh);
+	exp_put(exp);
+out:
+	path_release(&nd);
+	return err;
+}
+
+/*
+ * Called when we need the filehandle for the root of the pseudofs,
+ * for a given NFSv4 client.   The root is defined to be the
+ * export point with fsid==0
+ */
+int
+exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
+	       struct cache_req *creq)
+{
+	struct svc_expkey *fsid_key;
+	int rv;
+	u32 fsidv[2];
+
+	mk_fsid_v1(fsidv, 0);
+
+	fsid_key = exp_find_key(clp, 1, fsidv, creq);
+	if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN)
+		return nfserr_dropit;
+	if (!fsid_key || IS_ERR(fsid_key))
+		return nfserr_perm;
+
+	rv = fh_compose(fhp, fsid_key->ek_export, 
+			  fsid_key->ek_export->ex_dentry, NULL);
+	expkey_put(&fsid_key->h, &svc_expkey_cache);
+	return rv;
+}
+
+/* Iterator */
+
+static void *e_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+	unsigned hash, export;
+	struct cache_head *ch;
+	
+	exp_readlock();
+	read_lock(&svc_export_cache.hash_lock);
+	if (!n--)
+		return (void *)1;
+	hash = n >> 32;
+	export = n & ((1LL<<32) - 1);
+
+	
+	for (ch=export_table[hash]; ch; ch=ch->next)
+		if (!export--)
+			return ch;
+	n &= ~((1LL<<32) - 1);
+	do {
+		hash++;
+		n += 1LL<<32;
+	} while(hash < EXPORT_HASHMAX && export_table[hash]==NULL);
+	if (hash >= EXPORT_HASHMAX)
+		return NULL;
+	*pos = n+1;
+	return export_table[hash];
+}
+
+static void *e_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	struct cache_head *ch = p;
+	int hash = (*pos >> 32);
+
+	if (p == (void *)1)
+		hash = 0;
+	else if (ch->next == NULL) {
+		hash++;
+		*pos += 1LL<<32;
+	} else {
+		++*pos;
+		return ch->next;
+	}
+	*pos &= ~((1LL<<32) - 1);
+	while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) {
+		hash++;
+		*pos += 1LL<<32;
+	}
+	if (hash >= EXPORT_HASHMAX)
+		return NULL;
+	++*pos;
+	return export_table[hash];
+}
+
+static void e_stop(struct seq_file *m, void *p)
+{
+	read_unlock(&svc_export_cache.hash_lock);
+	exp_readunlock();
+}
+
+static struct flags {
+	int flag;
+	char *name[2];
+} expflags[] = {
+	{ NFSEXP_READONLY, {"ro", "rw"}},
+	{ NFSEXP_INSECURE_PORT, {"insecure", ""}},
+	{ NFSEXP_ROOTSQUASH, {"root_squash", "no_root_squash"}},
+	{ NFSEXP_ALLSQUASH, {"all_squash", ""}},
+	{ NFSEXP_ASYNC, {"async", "sync"}},
+	{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
+	{ NFSEXP_NOHIDE, {"nohide", ""}},
+	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
+	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
+	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+#ifdef MSNFS
+	{ NFSEXP_MSNFS, {"msnfs", ""}},
+#endif
+	{ 0, {"", ""}}
+};
+
+static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong)
+{
+	int first = 0;
+	struct flags *flg;
+
+	for (flg = expflags; flg->flag; flg++) {
+		int state = (flg->flag & flag)?0:1;
+		if (*flg->name[state])
+			seq_printf(m, "%s%s", first++?",":"", flg->name[state]);
+	}
+	if (flag & NFSEXP_FSID)
+		seq_printf(m, "%sfsid=%d", first++?",":"", fsid);
+	if (anonu != (uid_t)-2 && anonu != (0x10000-2))
+		seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
+	if (anong != (gid_t)-2 && anong != (0x10000-2))
+		seq_printf(m, "%sanongid=%d", first++?",":"", anong);
+}
+
+static int e_show(struct seq_file *m, void *p)
+{
+	struct cache_head *cp = p;
+	struct svc_export *exp = container_of(cp, struct svc_export, h);
+	svc_client *clp;
+
+	if (p == (void *)1) {
+		seq_puts(m, "# Version 1.1\n");
+		seq_puts(m, "# Path Client(Flags) # IPs\n");
+		return 0;
+	}
+
+	clp = exp->ex_client;
+	cache_get(&exp->h);
+	if (cache_check(&svc_export_cache, &exp->h, NULL))
+		return 0;
+	if (cache_put(&exp->h, &svc_export_cache)) BUG();
+	return svc_export_show(m, &svc_export_cache, cp);
+}
+
+struct seq_operations nfs_exports_op = {
+	.start	= e_start,
+	.next	= e_next,
+	.stop	= e_stop,
+	.show	= e_show,
+};
+
+/*
+ * Add or modify a client.
+ * Change requests may involve the list of host addresses. The list of
+ * exports and possibly existing uid maps are left untouched.
+ */
+int
+exp_addclient(struct nfsctl_client *ncp)
+{
+	struct auth_domain	*dom;
+	int			i, err;
+
+	/* First, consistency check. */
+	err = -EINVAL;
+	if (! exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX))
+		goto out;
+	if (ncp->cl_naddr > NFSCLNT_ADDRMAX)
+		goto out;
+
+	/* Lock the hashtable */
+	exp_writelock();
+
+	dom = unix_domain_find(ncp->cl_ident);
+
+	err = -ENOMEM;
+	if (!dom)
+		goto out_unlock;
+
+	/* Insert client into hashtable. */
+	for (i = 0; i < ncp->cl_naddr; i++)
+		auth_unix_add_addr(ncp->cl_addrlist[i], dom);
+
+	auth_unix_forget_old(dom);
+	auth_domain_put(dom);
+
+	err = 0;
+
+out_unlock:
+	exp_writeunlock();
+out:
+	return err;
+}
+
+/*
+ * Delete a client given an identifier.
+ */
+int
+exp_delclient(struct nfsctl_client *ncp)
+{
+	int		err;
+	struct auth_domain *dom;
+
+	err = -EINVAL;
+	if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX))
+		goto out;
+
+	/* Lock the hashtable */
+	exp_writelock();
+
+	dom = auth_domain_find(ncp->cl_ident);
+	/* just make sure that no addresses work 
+	 * and that it will expire soon 
+	 */
+	if (dom) {
+		err = auth_unix_forget_old(dom);
+		dom->h.expiry_time = get_seconds();
+		auth_domain_put(dom);
+	}
+
+	exp_writeunlock();
+out:
+	return err;
+}
+
+/*
+ * Verify that string is non-empty and does not exceed max length.
+ */
+static int
+exp_verify_string(char *cp, int max)
+{
+	int	i;
+
+	for (i = 0; i < max; i++)
+		if (!cp[i])
+			return i;
+	cp[i] = 0;
+	printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
+	return 0;
+}
+
+/*
+ * Initialize the exports module.
+ */
+void
+nfsd_export_init(void)
+{
+	dprintk("nfsd: initializing export module.\n");
+
+	cache_register(&svc_export_cache);
+	cache_register(&svc_expkey_cache);
+
+}
+
+/*
+ * Flush exports table - called when last nfsd thread is killed
+ */
+void
+nfsd_export_flush(void)
+{
+	exp_writelock();
+	cache_purge(&svc_expkey_cache);
+	cache_purge(&svc_export_cache);
+	exp_writeunlock();
+}
+
+/*
+ * Shutdown the exports module.
+ */
+void
+nfsd_export_shutdown(void)
+{
+
+	dprintk("nfsd: shutting down export module.\n");
+
+	exp_writelock();
+
+	if (cache_unregister(&svc_expkey_cache))
+		printk(KERN_ERR "nfsd: failed to unregister expkey cache\n");
+	if (cache_unregister(&svc_export_cache))
+		printk(KERN_ERR "nfsd: failed to unregister export cache\n");
+	svcauth_unix_purge();
+
+	exp_writeunlock();
+	dprintk("nfsd: export shutdown complete.\n");
+}
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
new file mode 100644
index 000000000000..7b889ff15ae6
--- /dev/null
+++ b/fs/nfsd/lockd.c
@@ -0,0 +1,79 @@
+/*
+ * linux/fs/nfsd/lockd.c
+ *
+ * This file contains all the stubs needed when communicating with lockd.
+ * This level of indirection is necessary so we can run nfsd+lockd without
+ * requiring the nfs client to be compiled in/loaded, and vice versa.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/lockd/bind.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_LOCKD
+
+/*
+ * Note: we hold the dentry use count while the file is open.
+ */
+static u32
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+{
+	u32		nfserr;
+	struct svc_fh	fh;
+
+	/* must initialize before using! but maxsize doesn't matter */
+	fh_init(&fh,0);
+	fh.fh_handle.fh_size = f->size;
+	memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
+	fh.fh_export = NULL;
+
+	exp_readlock();
+	nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp);
+	fh_put(&fh);
+	rqstp->rq_client = NULL;
+	exp_readunlock();
+ 	/* nlm and nfsd don't share error codes.
+	 * we invent: 0 = no error
+	 *            1 = stale file handle
+	 *	      2 = other error
+	 */
+	switch (nfserr) {
+	case nfs_ok:
+		return 0;
+	case nfserr_stale:
+		return 1;
+	default:
+		return 2;
+	}
+}
+
+static void
+nlm_fclose(struct file *filp)
+{
+	fput(filp);
+}
+
+static struct nlmsvc_binding	nfsd_nlm_ops = {
+	.fopen		= nlm_fopen,		/* open file for locking */
+	.fclose		= nlm_fclose,		/* close file */
+};
+
+void
+nfsd_lockd_init(void)
+{
+	dprintk("nfsd: initializing lockd\n");
+	nlmsvc_ops = &nfsd_nlm_ops;
+}
+
+void
+nfsd_lockd_shutdown(void)
+{
+	nlmsvc_ops = NULL;
+}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
new file mode 100644
index 000000000000..041380fe667b
--- /dev/null
+++ b/fs/nfsd/nfs3proc.c
@@ -0,0 +1,702 @@
+/*
+ * linux/fs/nfsd/nfs3proc.c
+ *
+ * Process version 3 NFS requests.
+ *
+ * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/linkage.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/major.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/nfs3.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_PROC
+
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+static int	nfs3_ftypes[] = {
+	0,			/* NF3NON */
+	S_IFREG,		/* NF3REG */
+	S_IFDIR,		/* NF3DIR */
+	S_IFBLK,		/* NF3BLK */
+	S_IFCHR,		/* NF3CHR */
+	S_IFLNK,		/* NF3LNK */
+	S_IFSOCK,		/* NF3SOCK */
+	S_IFIFO,		/* NF3FIFO */
+};
+
+/*
+ * NULL call.
+ */
+static int
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get a file's attributes
+ */
+static int
+nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
+					   struct nfsd3_attrstat *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: GETATTR(3)  %s\n",
+				SVCFH_fmt(&argp->fh));
+
+	fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set a file's attributes
+ */
+static int
+nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
+					   struct nfsd3_attrstat  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: SETATTR(3)  %s\n",
+				SVCFH_fmt(&argp->fh));
+
+	fh_copy(&resp->fh, &argp->fh);
+	nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
+			      argp->check_guard, argp->guardtime);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Look up a path name component
+ */
+static int
+nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+					  struct nfsd3_diropres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				argp->name);
+
+	fh_copy(&resp->dirfh, &argp->fh);
+	fh_init(&resp->fh, NFS3_FHSIZE);
+
+	nfserr = nfsd_lookup(rqstp, &resp->dirfh,
+				    argp->name,
+				    argp->len,
+				    &resp->fh);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Check file access
+ */
+static int
+nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+					  struct nfsd3_accessres *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
+				SVCFH_fmt(&argp->fh),
+				argp->access);
+
+	fh_copy(&resp->fh, &argp->fh);
+	resp->access = argp->access;
+	nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a symlink.
+ */
+static int
+nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
+					   struct nfsd3_readlinkres *resp)
+{
+	int nfserr;
+
+	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
+
+	/* Read the symlink. */
+	fh_copy(&resp->fh, &argp->fh);
+	resp->len = NFS3_MAXPATHLEN;
+	nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a file.
+ */
+static int
+nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
+				        struct nfsd3_readres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
+				SVCFH_fmt(&argp->fh),
+				(unsigned long) argp->count,
+				(unsigned long) argp->offset);
+
+	/* Obtain buffer pointer for payload.
+	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
+	 * + 1 (xdr opaque byte count) = 26
+	 */
+
+	resp->count = argp->count;
+	if (NFSSVC_MAXBLKSIZE < resp->count)
+		resp->count = NFSSVC_MAXBLKSIZE;
+
+	svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
+
+	fh_copy(&resp->fh, &argp->fh);
+	nfserr = nfsd_read(rqstp, &resp->fh, NULL,
+				  argp->offset,
+			   	  argp->vec, argp->vlen,
+				  &resp->count);
+	if (nfserr == 0) {
+		struct inode	*inode = resp->fh.fh_dentry->d_inode;
+
+		resp->eof = (argp->offset + resp->count) >= inode->i_size;
+	}
+
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Write data to a file
+ */
+static int
+nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
+					 struct nfsd3_writeres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: WRITE(3)    %s %d bytes at %ld%s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				(unsigned long) argp->offset,
+				argp->stable? " stable" : "");
+
+	fh_copy(&resp->fh, &argp->fh);
+	resp->committed = argp->stable;
+	nfserr = nfsd_write(rqstp, &resp->fh, NULL,
+				   argp->offset,
+				   argp->vec, argp->vlen,
+				   argp->len,
+				   &resp->committed);
+	resp->count = argp->count;
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * With NFSv3, CREATE processing is a lot easier than with NFSv2.
+ * At least in theory; we'll see how it fares in practice when the
+ * first reports about SunOS compatibility problems start to pour in...
+ */
+static int
+nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
+					  struct nfsd3_diropres   *resp)
+{
+	svc_fh		*dirfhp, *newfhp = NULL;
+	struct iattr	*attr;
+	u32		nfserr;
+
+	dprintk("nfsd: CREATE(3)   %s %.*s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				argp->name);
+
+	dirfhp = fh_copy(&resp->dirfh, &argp->fh);
+	newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
+	attr   = &argp->attrs;
+
+	/* Get the directory inode */
+	nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE);
+	if (nfserr)
+		RETURN_STATUS(nfserr);
+
+	/* Unfudge the mode bits */
+	attr->ia_mode &= ~S_IFMT;
+	if (!(attr->ia_valid & ATTR_MODE)) { 
+		attr->ia_valid |= ATTR_MODE;
+		attr->ia_mode = S_IFREG;
+	} else {
+		attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
+	}
+
+	/* Now create the file and set attributes */
+	nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
+				attr, newfhp,
+				argp->createmode, argp->verf, NULL);
+
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Make directory. This operation is not idempotent.
+ */
+static int
+nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
+					 struct nfsd3_diropres   *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				argp->name);
+
+	argp->attrs.ia_valid &= ~ATTR_SIZE;
+	fh_copy(&resp->dirfh, &argp->fh);
+	fh_init(&resp->fh, NFS3_FHSIZE);
+	nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+				    &argp->attrs, S_IFDIR, 0, &resp->fh);
+
+	RETURN_STATUS(nfserr);
+}
+
+static int
+nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
+					   struct nfsd3_diropres    *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
+				SVCFH_fmt(&argp->ffh),
+				argp->flen, argp->fname,
+				argp->tlen, argp->tname);
+
+	fh_copy(&resp->dirfh, &argp->ffh);
+	fh_init(&resp->fh, NFS3_FHSIZE);
+	nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
+						   argp->tname, argp->tlen,
+						   &resp->fh, &argp->attrs);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Make socket/fifo/device.
+ */
+static int
+nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
+					 struct nfsd3_diropres  *resp)
+{
+	int	nfserr, type;
+	dev_t	rdev = 0;
+
+	dprintk("nfsd: MKNOD(3)    %s %.*s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				argp->name);
+
+	fh_copy(&resp->dirfh, &argp->fh);
+	fh_init(&resp->fh, NFS3_FHSIZE);
+
+	if (argp->ftype == 0 || argp->ftype >= NF3BAD)
+		RETURN_STATUS(nfserr_inval);
+	if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) {
+		rdev = MKDEV(argp->major, argp->minor);
+		if (MAJOR(rdev) != argp->major ||
+		    MINOR(rdev) != argp->minor)
+			RETURN_STATUS(nfserr_inval);
+	} else
+		if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO)
+			RETURN_STATUS(nfserr_inval);
+
+	type = nfs3_ftypes[argp->ftype];
+	nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+				    &argp->attrs, type, rdev, &resp->fh);
+
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Remove file/fifo/socket etc.
+ */
+static int
+nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+					  struct nfsd3_attrstat  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				argp->name);
+
+	/* Unlink. -S_IFDIR means file must not be a directory */
+	fh_copy(&resp->fh, &argp->fh);
+	nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Remove a directory
+ */
+static int
+nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+					 struct nfsd3_attrstat  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
+				SVCFH_fmt(&argp->fh),
+				argp->len,
+				argp->name);
+
+	fh_copy(&resp->fh, &argp->fh);
+	nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len);
+	RETURN_STATUS(nfserr);
+}
+
+static int
+nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
+					  struct nfsd3_renameres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
+				SVCFH_fmt(&argp->ffh),
+				argp->flen,
+				argp->fname);
+	dprintk("nfsd: -> %s %.*s\n",
+				SVCFH_fmt(&argp->tfh),
+				argp->tlen,
+				argp->tname);
+
+	fh_copy(&resp->ffh, &argp->ffh);
+	fh_copy(&resp->tfh, &argp->tfh);
+	nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
+				    &resp->tfh, argp->tname, argp->tlen);
+	RETURN_STATUS(nfserr);
+}
+
+static int
+nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
+					struct nfsd3_linkres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: LINK(3)     %s ->\n",
+				SVCFH_fmt(&argp->ffh));
+	dprintk("nfsd:   -> %s %.*s\n",
+				SVCFH_fmt(&argp->tfh),
+				argp->tlen,
+				argp->tname);
+
+	fh_copy(&resp->fh,  &argp->ffh);
+	fh_copy(&resp->tfh, &argp->tfh);
+	nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
+				  &resp->fh);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a directory.
+ */
+static int
+nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
+					   struct nfsd3_readdirres  *resp)
+{
+	int		nfserr, count;
+
+	dprintk("nfsd: READDIR(3)  %s %d bytes at %d\n",
+				SVCFH_fmt(&argp->fh),
+				argp->count, (u32) argp->cookie);
+
+	/* Make sure we've room for the NULL ptr & eof flag, and shrink to
+	 * client read size */
+	count = (argp->count >> 2) - 2;
+
+	/* Read directory and encode entries on the fly */
+	fh_copy(&resp->fh, &argp->fh);
+
+	resp->buflen = count;
+	resp->common.err = nfs_ok;
+	resp->buffer = argp->buffer;
+	resp->rqstp = rqstp;
+	nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, 
+					&resp->common, nfs3svc_encode_entry);
+	memcpy(resp->verf, argp->verf, 8);
+	resp->count = resp->buffer - argp->buffer;
+	if (resp->offset)
+		xdr_encode_hyper(resp->offset, argp->cookie);
+
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a directory, including file handles and attrs.
+ * For now, we choose to ignore the dircount parameter.
+ */
+static int
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
+					       struct nfsd3_readdirres  *resp)
+{
+	int	nfserr, count = 0;
+	loff_t	offset;
+	int	i;
+	caddr_t	page_addr = NULL;
+
+	dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
+				SVCFH_fmt(&argp->fh),
+				argp->count, (u32) argp->cookie);
+
+	/* Convert byte count to number of words (i.e. >> 2),
+	 * and reserve room for the NULL ptr & eof flag (-2 words) */
+	resp->count = (argp->count >> 2) - 2;
+
+	/* Read directory and encode entries on the fly */
+	fh_copy(&resp->fh, &argp->fh);
+
+	resp->common.err = nfs_ok;
+	resp->buffer = argp->buffer;
+	resp->buflen = resp->count;
+	resp->rqstp = rqstp;
+	offset = argp->cookie;
+	nfserr = nfsd_readdir(rqstp, &resp->fh,
+				     &offset,
+				     &resp->common,
+				     nfs3svc_encode_entry_plus);
+	memcpy(resp->verf, argp->verf, 8);
+	for (i=1; i<rqstp->rq_resused ; i++) {
+		page_addr = page_address(rqstp->rq_respages[i]);
+
+		if (((caddr_t)resp->buffer >= page_addr) &&
+		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+			count += (caddr_t)resp->buffer - page_addr;
+			break;
+		}
+		count += PAGE_SIZE;
+	}
+	resp->count = count >> 2;
+	if (resp->offset) {
+		if (unlikely(resp->offset1)) {
+			/* we ended up with offset on a page boundary */
+			*resp->offset = htonl(offset >> 32);
+			*resp->offset1 = htonl(offset & 0xffffffff);
+			resp->offset1 = NULL;
+		} else {
+			xdr_encode_hyper(resp->offset, offset);
+		}
+	}
+
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get file system stats
+ */
+static int
+nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
+					   struct nfsd3_fsstatres *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: FSSTAT(3)   %s\n",
+				SVCFH_fmt(&argp->fh));
+
+	nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats);
+	fh_put(&argp->fh);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get file system info
+ */
+static int
+nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
+					   struct nfsd3_fsinfores *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: FSINFO(3)   %s\n",
+				SVCFH_fmt(&argp->fh));
+
+	resp->f_rtmax  = NFSSVC_MAXBLKSIZE;
+	resp->f_rtpref = NFSSVC_MAXBLKSIZE;
+	resp->f_rtmult = PAGE_SIZE;
+	resp->f_wtmax  = NFSSVC_MAXBLKSIZE;
+	resp->f_wtpref = NFSSVC_MAXBLKSIZE;
+	resp->f_wtmult = PAGE_SIZE;
+	resp->f_dtpref = PAGE_SIZE;
+	resp->f_maxfilesize = ~(u32) 0;
+	resp->f_properties = NFS3_FSF_DEFAULT;
+
+	nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+
+	/* Check special features of the file system. May request
+	 * different read/write sizes for file systems known to have
+	 * problems with large blocks */
+	if (nfserr == 0) {
+		struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+
+		/* Note that we don't care for remote fs's here */
+		if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) {
+			resp->f_properties = NFS3_FSF_BILLYBOY;
+		}
+		resp->f_maxfilesize = sb->s_maxbytes;
+	}
+
+	fh_put(&argp->fh);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get pathconf info for the specified file
+ */
+static int
+nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
+					     struct nfsd3_pathconfres *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: PATHCONF(3) %s\n",
+				SVCFH_fmt(&argp->fh));
+
+	/* Set default pathconf */
+	resp->p_link_max = 255;		/* at least */
+	resp->p_name_max = 255;		/* at least */
+	resp->p_no_trunc = 0;
+	resp->p_chown_restricted = 1;
+	resp->p_case_insensitive = 0;
+	resp->p_case_preserving = 1;
+
+	nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+
+	if (nfserr == 0) {
+		struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+
+		/* Note that we don't care for remote fs's here */
+		switch (sb->s_magic) {
+		case EXT2_SUPER_MAGIC:
+			resp->p_link_max = EXT2_LINK_MAX;
+			resp->p_name_max = EXT2_NAME_LEN;
+			break;
+		case 0x4d44:	/* MSDOS_SUPER_MAGIC */
+			resp->p_case_insensitive = 1;
+			resp->p_case_preserving  = 0;
+			break;
+		}
+	}
+
+	fh_put(&argp->fh);
+	RETURN_STATUS(nfserr);
+}
+
+
+/*
+ * Commit a file (range) to stable storage.
+ */
+static int
+nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
+					   struct nfsd3_commitres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
+				SVCFH_fmt(&argp->fh),
+				argp->count,
+				(unsigned long long) argp->offset);
+
+	if (argp->offset > NFS_OFFSET_MAX)
+		RETURN_STATUS(nfserr_inval);
+
+	fh_copy(&resp->fh, &argp->fh);
+	nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
+
+	RETURN_STATUS(nfserr);
+}
+
+
+/*
+ * NFSv3 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+#define nfs3svc_decode_voidargs		NULL
+#define nfs3svc_release_void		NULL
+#define nfs3svc_decode_fhandleargs	nfs3svc_decode_fhandle
+#define nfs3svc_encode_attrstatres	nfs3svc_encode_attrstat
+#define nfs3svc_encode_wccstatres	nfs3svc_encode_wccstat
+#define nfsd3_mkdirargs			nfsd3_createargs
+#define nfsd3_readdirplusargs		nfsd3_readdirargs
+#define nfsd3_fhandleargs		nfsd_fhandle
+#define nfsd3_fhandleres		nfsd3_attrstat
+#define nfsd3_attrstatres		nfsd3_attrstat
+#define nfsd3_wccstatres		nfsd3_attrstat
+#define nfsd3_createres			nfsd3_diropres
+#define nfsd3_voidres			nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsd3_proc_##name,		\
+   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+   (kxdrproc_t) nfs3svc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define FH 17		/* filehandle with length */
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define WC (7+pAT)	/* WCC attributes */
+
+static struct svc_procedure		nfsd_procedures3[22] = {
+  PROC(null,	 void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getattr,	 fhandle,	attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(setattr,  sattr,		wccstat,	fhandle,  RC_REPLBUFF, ST+WC),
+  PROC(lookup,	 dirop,		dirop,		fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
+  PROC(access,	 access,	access,		fhandle,  RC_NOCACHE, ST+pAT+1),
+  PROC(readlink, readlink,	readlink,	fhandle,  RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
+  PROC(read,	 read,		read,		fhandle,  RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE),
+  PROC(write,	 write,		write,		fhandle,  RC_REPLBUFF, ST+WC+4),
+  PROC(create,	 create,	create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+  PROC(mkdir,	 mkdir,		create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+  PROC(symlink,	 symlink,	create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+  PROC(mknod,	 mknod,		create,		fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+  PROC(remove,	 dirop,		wccstat,	fhandle,  RC_REPLBUFF, ST+WC),
+  PROC(rmdir,	 dirop,		wccstat,	fhandle,  RC_REPLBUFF, ST+WC),
+  PROC(rename,	 rename,	rename,		fhandle2, RC_REPLBUFF, ST+WC+WC),
+  PROC(link,	 link,		link,		fhandle2, RC_REPLBUFF, ST+pAT+WC),
+  PROC(readdir,	 readdir,	readdir,	fhandle,  RC_NOCACHE, 0),
+  PROC(readdirplus,readdirplus,	readdir,	fhandle,  RC_NOCACHE, 0),
+  PROC(fsstat,	 fhandle,	fsstat,		void,     RC_NOCACHE, ST+pAT+2*6+1),
+  PROC(fsinfo,   fhandle,	fsinfo,		void,     RC_NOCACHE, ST+pAT+12),
+  PROC(pathconf, fhandle,	pathconf,	void,     RC_NOCACHE, ST+pAT+6),
+  PROC(commit,	 commit,	commit,		fhandle,  RC_NOCACHE, ST+WC+2),
+};
+
+struct svc_version	nfsd_version3 = {
+		.vs_vers	= 3,
+		.vs_nproc	= 22,
+		.vs_proc	= nfsd_procedures3,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
new file mode 100644
index 000000000000..11f806835c5a
--- /dev/null
+++ b/fs/nfsd/nfs3xdr.c
@@ -0,0 +1,1092 @@
+/*
+ * linux/fs/nfsd/nfs3xdr.c
+ *
+ * XDR support for nfsd/protocol version 3.
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
+ */
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/nfs3.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/vfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/xdr3.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_XDR
+
+#ifdef NFSD_OPTIMIZE_SPACE
+# define inline
+#endif
+
+
+/*
+ * Mapping of S_IF* types to NFS file types
+ */
+static u32	nfs3_ftypes[] = {
+	NF3NON,  NF3FIFO, NF3CHR, NF3BAD,
+	NF3DIR,  NF3BAD,  NF3BLK, NF3BAD,
+	NF3REG,  NF3BAD,  NF3LNK, NF3BAD,
+	NF3SOCK, NF3BAD,  NF3LNK, NF3BAD,
+};
+
+/*
+ * XDR functions for basic NFS types
+ */
+static inline u32 *
+encode_time3(u32 *p, struct timespec *time)
+{
+	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
+	return p;
+}
+
+static inline u32 *
+decode_time3(u32 *p, struct timespec *time)
+{
+	time->tv_sec = ntohl(*p++);
+	time->tv_nsec = ntohl(*p++);
+	return p;
+}
+
+static inline u32 *
+decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	unsigned int size;
+	fh_init(fhp, NFS3_FHSIZE);
+	size = ntohl(*p++);
+	if (size > NFS3_FHSIZE)
+		return NULL;
+
+	memcpy(&fhp->fh_handle.fh_base, p, size);
+	fhp->fh_handle.fh_size = size;
+	return p + XDR_QUADLEN(size);
+}
+
+static inline u32 *
+encode_fh(u32 *p, struct svc_fh *fhp)
+{
+	unsigned int size = fhp->fh_handle.fh_size;
+	*p++ = htonl(size);
+	if (size) p[XDR_QUADLEN(size)-1]=0;
+	memcpy(p, &fhp->fh_handle.fh_base, size);
+	return p + XDR_QUADLEN(size);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static inline u32 *
+decode_filename(u32 *p, char **namp, int *lenp)
+{
+	char		*name;
+	int		i;
+
+	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
+		for (i = 0, name = *namp; i < *lenp; i++, name++) {
+			if (*name == '\0' || *name == '/')
+				return NULL;
+		}
+	}
+
+	return p;
+}
+
+static inline u32 *
+decode_sattr3(u32 *p, struct iattr *iap)
+{
+	u32	tmp;
+
+	iap->ia_valid = 0;
+
+	if (*p++) {
+		iap->ia_valid |= ATTR_MODE;
+		iap->ia_mode = ntohl(*p++);
+	}
+	if (*p++) {
+		iap->ia_valid |= ATTR_UID;
+		iap->ia_uid = ntohl(*p++);
+	}
+	if (*p++) {
+		iap->ia_valid |= ATTR_GID;
+		iap->ia_gid = ntohl(*p++);
+	}
+	if (*p++) {
+		u64	newsize;
+
+		iap->ia_valid |= ATTR_SIZE;
+		p = xdr_decode_hyper(p, &newsize);
+		if (newsize <= NFS_OFFSET_MAX)
+			iap->ia_size = newsize;
+		else
+			iap->ia_size = NFS_OFFSET_MAX;
+	}
+	if ((tmp = ntohl(*p++)) == 1) {	/* set to server time */
+		iap->ia_valid |= ATTR_ATIME;
+	} else if (tmp == 2) {		/* set to client time */
+		iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+		iap->ia_atime.tv_sec = ntohl(*p++);
+		iap->ia_atime.tv_nsec = ntohl(*p++);
+	}
+	if ((tmp = ntohl(*p++)) == 1) {	/* set to server time */
+		iap->ia_valid |= ATTR_MTIME;
+	} else if (tmp == 2) {		/* set to client time */
+		iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
+		iap->ia_mtime.tv_sec = ntohl(*p++);
+		iap->ia_mtime.tv_nsec = ntohl(*p++);
+	}
+	return p;
+}
+
+static inline u32 *
+encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	struct vfsmount *mnt = fhp->fh_export->ex_mnt;
+	struct dentry	*dentry = fhp->fh_dentry;
+	struct kstat stat;
+	struct timespec time;
+
+	vfs_getattr(mnt, dentry, &stat);
+
+	*p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]);
+	*p++ = htonl((u32) stat.mode);
+	*p++ = htonl((u32) stat.nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
+	if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) {
+		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+	} else {
+		p = xdr_encode_hyper(p, (u64) stat.size);
+	}
+	p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9);
+	*p++ = htonl((u32) MAJOR(stat.rdev));
+	*p++ = htonl((u32) MINOR(stat.rdev));
+	if (is_fsid(fhp, rqstp->rq_reffh))
+		p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
+	else
+		p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev));
+	p = xdr_encode_hyper(p, (u64) stat.ino);
+	p = encode_time3(p, &stat.atime);
+	lease_get_mtime(dentry->d_inode, &time); 
+	p = encode_time3(p, &time);
+	p = encode_time3(p, &stat.ctime);
+
+	return p;
+}
+
+static inline u32 *
+encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	struct inode	*inode = fhp->fh_dentry->d_inode;
+
+	/* Attributes to follow */
+	*p++ = xdr_one;
+
+	*p++ = htonl(nfs3_ftypes[(fhp->fh_post_mode & S_IFMT) >> 12]);
+	*p++ = htonl((u32) fhp->fh_post_mode);
+	*p++ = htonl((u32) fhp->fh_post_nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, fhp->fh_post_uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, fhp->fh_post_gid));
+	if (S_ISLNK(fhp->fh_post_mode) && fhp->fh_post_size > NFS3_MAXPATHLEN) {
+		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+	} else {
+		p = xdr_encode_hyper(p, (u64) fhp->fh_post_size);
+	}
+	p = xdr_encode_hyper(p, ((u64)fhp->fh_post_blocks) << 9);
+	*p++ = fhp->fh_post_rdev[0];
+	*p++ = fhp->fh_post_rdev[1];
+	if (is_fsid(fhp, rqstp->rq_reffh))
+		p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
+	else
+		p = xdr_encode_hyper(p, (u64)huge_encode_dev(inode->i_sb->s_dev));
+	p = xdr_encode_hyper(p, (u64) inode->i_ino);
+	p = encode_time3(p, &fhp->fh_post_atime);
+	p = encode_time3(p, &fhp->fh_post_mtime);
+	p = encode_time3(p, &fhp->fh_post_ctime);
+
+	return p;
+}
+
+/*
+ * Encode post-operation attributes.
+ * The inode may be NULL if the call failed because of a stale file
+ * handle. In this case, no attributes are returned.
+ */
+static u32 *
+encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	struct dentry *dentry = fhp->fh_dentry;
+	if (dentry && dentry->d_inode != NULL) {
+		*p++ = xdr_one;		/* attributes follow */
+		return encode_fattr3(rqstp, p, fhp);
+	}
+	*p++ = xdr_zero;
+	return p;
+}
+
+/*
+ * Enocde weak cache consistency data
+ */
+static u32 *
+encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	struct dentry	*dentry = fhp->fh_dentry;
+
+	if (dentry && dentry->d_inode && fhp->fh_post_saved) {
+		if (fhp->fh_pre_saved) {
+			*p++ = xdr_one;
+			p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
+			p = encode_time3(p, &fhp->fh_pre_mtime);
+			p = encode_time3(p, &fhp->fh_pre_ctime);
+		} else {
+			*p++ = xdr_zero;
+		}
+		return encode_saved_post_attr(rqstp, p, fhp);
+	}
+	/* no pre- or post-attrs */
+	*p++ = xdr_zero;
+	return encode_post_op_attr(rqstp, p, fhp);
+}
+
+
+/*
+ * XDR decode functions
+ */
+int
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_sattrargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_sattr3(p, &args->attrs)))
+		return 0;
+
+	if ((args->check_guard = ntohl(*p++)) != 0) { 
+		struct timespec time; 
+		p = decode_time3(p, &time);
+		args->guardtime = time.tv_sec;
+	}
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_diropargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_accessargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	args->access = ntohl(*p++);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readargs *args)
+{
+	unsigned int len;
+	int v,pn;
+
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = xdr_decode_hyper(p, &args->offset)))
+		return 0;
+
+	len = args->count = ntohl(*p++);
+
+	if (len > NFSSVC_MAXBLKSIZE)
+		len = NFSSVC_MAXBLKSIZE;
+
+	/* set up the kvec */
+	v=0;
+	while (len > 0) {
+		pn = rqstp->rq_resused;
+		svc_take_page(rqstp);
+		args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+		len -= args->vec[v].iov_len;
+		v++;
+	}
+	args->vlen = v;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_writeargs *args)
+{
+	unsigned int len, v, hdr;
+
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = xdr_decode_hyper(p, &args->offset)))
+		return 0;
+
+	args->count = ntohl(*p++);
+	args->stable = ntohl(*p++);
+	len = args->len = ntohl(*p++);
+
+	hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
+	if (rqstp->rq_arg.len < len + hdr)
+		return 0;
+
+	args->vec[0].iov_base = (void*)p;
+	args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+
+	if (len > NFSSVC_MAXBLKSIZE)
+		len = NFSSVC_MAXBLKSIZE;
+	v=  0;
+	while (len > args->vec[v].iov_len) {
+		len -= args->vec[v].iov_len;
+		v++;
+		args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+		args->vec[v].iov_len = PAGE_SIZE;
+	}
+	args->vec[v].iov_len = len;
+	args->vlen = v+1;
+
+	return args->count == args->len && args->vec[0].iov_len > 0;
+}
+
+int
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_createargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	switch (args->createmode = ntohl(*p++)) {
+	case NFS3_CREATE_UNCHECKED:
+	case NFS3_CREATE_GUARDED:
+		if (!(p = decode_sattr3(p, &args->attrs)))
+			return 0;
+		break;
+	case NFS3_CREATE_EXCLUSIVE:
+		args->verf = p;
+		p += 2;
+		break;
+	default:
+		return 0;
+	}
+
+	return xdr_argsize_check(rqstp, p);
+}
+int
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_createargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len))
+	 || !(p = decode_sattr3(p, &args->attrs)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_symlinkargs *args)
+{
+	unsigned int len;
+	int avail;
+	char *old, *new;
+	struct kvec *vec;
+
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_filename(p, &args->fname, &args->flen))
+	 || !(p = decode_sattr3(p, &args->attrs))
+		)
+		return 0;
+	/* now decode the pathname, which might be larger than the first page.
+	 * As we have to check for nul's anyway, we copy it into a new page
+	 * This page appears in the rq_res.pages list, but as pages_len is always
+	 * 0, it won't get in the way
+	 */
+	svc_take_page(rqstp);
+	len = ntohl(*p++);
+	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
+		return 0;
+	args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+	args->tlen = len;
+	/* first copy and check from the first page */
+	old = (char*)p;
+	vec = &rqstp->rq_arg.head[0];
+	avail = vec->iov_len - (old - (char*)vec->iov_base);
+	while (len && avail && *old) {
+		*new++ = *old++;
+		len--;
+		avail--;
+	}
+	/* now copy next page if there is one */
+	if (len && !avail && rqstp->rq_arg.page_len) {
+		avail = rqstp->rq_arg.page_len;
+		if (avail > PAGE_SIZE) avail = PAGE_SIZE;
+		old = page_address(rqstp->rq_arg.pages[0]);
+	}
+	while (len && avail && *old) {
+		*new++ = *old++;
+		len--;
+		avail--;
+	}
+	*new = '\0';
+	if (len)
+		return 0;
+
+	return 1;
+}
+
+int
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_mknodargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	args->ftype = ntohl(*p++);
+
+	if (args->ftype == NF3BLK  || args->ftype == NF3CHR
+	 || args->ftype == NF3SOCK || args->ftype == NF3FIFO) {
+		if (!(p = decode_sattr3(p, &args->attrs)))
+			return 0;
+	}
+
+	if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
+		args->major = ntohl(*p++);
+		args->minor = ntohl(*p++);
+	}
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_renameargs *args)
+{
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_filename(p, &args->fname, &args->flen))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readlinkargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	svc_take_page(rqstp);
+	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_linkargs *args)
+{
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readdirargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	p = xdr_decode_hyper(p, &args->cookie);
+	args->verf   = p; p += 2;
+	args->dircount = ~0;
+	args->count  = ntohl(*p++);
+
+	if (args->count > PAGE_SIZE)
+		args->count = PAGE_SIZE;
+
+	svc_take_page(rqstp);
+	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readdirargs *args)
+{
+	int len, pn;
+
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	p = xdr_decode_hyper(p, &args->cookie);
+	args->verf     = p; p += 2;
+	args->dircount = ntohl(*p++);
+	args->count    = ntohl(*p++);
+
+	len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE :
+						  args->count;
+	args->count = len;
+
+	while (len > 0) {
+		pn = rqstp->rq_resused;
+		svc_take_page(rqstp);
+		if (!args->buffer)
+			args->buffer = page_address(rqstp->rq_respages[pn]);
+		len -= PAGE_SIZE;
+	}
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_commitargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	p = xdr_decode_hyper(p, &args->offset);
+	args->count = ntohl(*p++);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+/*
+ * There must be an encoding function for void results so svc_process
+ * will work properly.
+ */
+int
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* GETATTR */
+int
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_attrstat *resp)
+{
+	if (resp->status == 0)
+		p = encode_fattr3(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* SETATTR, REMOVE, RMDIR */
+int
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_attrstat *resp)
+{
+	p = encode_wcc_data(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* LOOKUP */
+int
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_diropres *resp)
+{
+	if (resp->status == 0) {
+		p = encode_fh(p, &resp->fh);
+		p = encode_post_op_attr(rqstp, p, &resp->fh);
+	}
+	p = encode_post_op_attr(rqstp, p, &resp->dirfh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+int
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_accessres *resp)
+{
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0)
+		*p++ = htonl(resp->access);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* READLINK */
+int
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readlinkres *resp)
+{
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0) {
+		*p++ = htonl(resp->len);
+		xdr_ressize_check(rqstp, p);
+		rqstp->rq_res.page_len = resp->len;
+		if (resp->len & 3) {
+			/* need to pad the tail */
+			rqstp->rq_restailpage = 0;
+			rqstp->rq_res.tail[0].iov_base = p;
+			*p = 0;
+			rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+		}
+		return 1;
+	} else
+		return xdr_ressize_check(rqstp, p);
+}
+
+/* READ */
+int
+nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readres *resp)
+{
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0) {
+		*p++ = htonl(resp->count);
+		*p++ = htonl(resp->eof);
+		*p++ = htonl(resp->count);	/* xdr opaque count */
+		xdr_ressize_check(rqstp, p);
+		/* now update rqstp->rq_res to reflect data aswell */
+		rqstp->rq_res.page_len = resp->count;
+		if (resp->count & 3) {
+			/* need to pad the tail */
+			rqstp->rq_restailpage = 0;
+			rqstp->rq_res.tail[0].iov_base = p;
+			*p = 0;
+			rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
+		}
+		return 1;
+	} else
+		return xdr_ressize_check(rqstp, p);
+}
+
+/* WRITE */
+int
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_writeres *resp)
+{
+	p = encode_wcc_data(rqstp, p, &resp->fh);
+	if (resp->status == 0) {
+		*p++ = htonl(resp->count);
+		*p++ = htonl(resp->committed);
+		*p++ = htonl(nfssvc_boot.tv_sec);
+		*p++ = htonl(nfssvc_boot.tv_usec);
+	}
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* CREATE, MKDIR, SYMLINK, MKNOD */
+int
+nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_diropres *resp)
+{
+	if (resp->status == 0) {
+		*p++ = xdr_one;
+		p = encode_fh(p, &resp->fh);
+		p = encode_post_op_attr(rqstp, p, &resp->fh);
+	}
+	p = encode_wcc_data(rqstp, p, &resp->dirfh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* RENAME */
+int
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_renameres *resp)
+{
+	p = encode_wcc_data(rqstp, p, &resp->ffh);
+	p = encode_wcc_data(rqstp, p, &resp->tfh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* LINK */
+int
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_linkres *resp)
+{
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	p = encode_wcc_data(rqstp, p, &resp->tfh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* READDIR */
+int
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_readdirres *resp)
+{
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+
+	if (resp->status == 0) {
+		/* stupid readdir cookie */
+		memcpy(p, resp->verf, 8); p += 2;
+		xdr_ressize_check(rqstp, p);
+		if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE)
+			return 1; /*No room for trailer */
+		rqstp->rq_res.page_len = (resp->count) << 2;
+
+		/* add the 'tail' to the end of the 'head' page - page 0. */
+		rqstp->rq_restailpage = 0;
+		rqstp->rq_res.tail[0].iov_base = p;
+		*p++ = 0;		/* no more entries */
+		*p++ = htonl(resp->common.err == nfserr_eof);
+		rqstp->rq_res.tail[0].iov_len = 2<<2;
+		return 1;
+	} else
+		return xdr_ressize_check(rqstp, p);
+}
+
+static inline u32 *
+encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name,
+	     int namlen, ino_t ino)
+{
+	*p++ = xdr_one;				 /* mark entry present */
+	p    = xdr_encode_hyper(p, ino);	 /* file id */
+	p    = xdr_encode_array(p, name, namlen);/* name length & name */
+
+	cd->offset = p;				/* remember pointer */
+	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */
+
+	return p;
+}
+
+static inline u32 *
+encode_entryplus_baggage(struct nfsd3_readdirres *cd, u32 *p,
+		struct svc_fh *fhp)
+{
+		p = encode_post_op_attr(cd->rqstp, p, fhp);
+		*p++ = xdr_one;			/* yes, a file handle follows */
+		p = encode_fh(p, fhp);
+		fh_put(fhp);
+		return p;
+}
+
+static int
+compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
+		const char *name, int namlen)
+{
+	struct svc_export	*exp;
+	struct dentry		*dparent, *dchild;
+	int rv = 0;
+
+	dparent = cd->fh.fh_dentry;
+	exp  = cd->fh.fh_export;
+
+	fh_init(fhp, NFS3_FHSIZE);
+	if (isdotent(name, namlen)) {
+		if (namlen == 2) {
+			dchild = dget_parent(dparent);
+			if (dchild == dparent) {
+				/* filesystem root - cannot return filehandle for ".." */
+				dput(dchild);
+				return 1;
+			}
+		} else
+			dchild = dget(dparent);
+	} else
+		dchild = lookup_one_len(name, dparent, namlen);
+	if (IS_ERR(dchild))
+		return 1;
+	if (d_mountpoint(dchild) ||
+	    fh_compose(fhp, exp, dchild, &cd->fh) != 0 ||
+	    !dchild->d_inode)
+		rv = 1;
+	dput(dchild);
+	return rv;
+}
+
+/*
+ * Encode a directory entry. This one works for both normal readdir
+ * and readdirplus.
+ * The normal readdir reply requires 2 (fileid) + 1 (stringlen)
+ * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen.
+ * 
+ * The readdirplus baggage is 1+21 words for post_op_attr, plus the
+ * file handle.
+ */
+
+#define NFS3_ENTRY_BAGGAGE	(2 + 1 + 2 + 1)
+#define NFS3_ENTRYPLUS_BAGGAGE	(1 + 21 + 1 + (NFS3_FHSIZE >> 2))
+static int
+encode_entry(struct readdir_cd *ccd, const char *name,
+	     int namlen, off_t offset, ino_t ino, unsigned int d_type, int plus)
+{
+	struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
+		       					common);
+	u32		*p = cd->buffer;
+	caddr_t		curr_page_addr = NULL;
+	int		pn;		/* current page number */
+	int		slen;		/* string (name) length */
+	int		elen;		/* estimated entry length in words */
+	int		num_entry_words = 0;	/* actual number of words */
+
+	if (cd->offset) {
+		u64 offset64 = offset;
+
+		if (unlikely(cd->offset1)) {
+			/* we ended up with offset on a page boundary */
+			*cd->offset = htonl(offset64 >> 32);
+			*cd->offset1 = htonl(offset64 & 0xffffffff);
+			cd->offset1 = NULL;
+		} else {
+			xdr_encode_hyper(cd->offset, (u64) offset);
+		}
+	}
+
+	/*
+	dprintk("encode_entry(%.*s @%ld%s)\n",
+		namlen, name, (long) offset, plus? " plus" : "");
+	 */
+
+	/* truncate filename if too long */
+	if (namlen > NFS3_MAXNAMLEN)
+		namlen = NFS3_MAXNAMLEN;
+
+	slen = XDR_QUADLEN(namlen);
+	elen = slen + NFS3_ENTRY_BAGGAGE
+		+ (plus? NFS3_ENTRYPLUS_BAGGAGE : 0);
+
+	if (cd->buflen < elen) {
+		cd->common.err = nfserr_toosmall;
+		return -EINVAL;
+	}
+
+	/* determine which page in rq_respages[] we are currently filling */
+	for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
+		curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+
+		if (((caddr_t)cd->buffer >= curr_page_addr) &&
+		    ((caddr_t)cd->buffer <  curr_page_addr + PAGE_SIZE))
+			break;
+	}
+
+	if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) {
+		/* encode entry in current page */
+
+		p = encode_entry_baggage(cd, p, name, namlen, ino);
+
+		/* throw in readdirplus baggage */
+		if (plus) {
+			struct svc_fh	fh;
+
+			if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
+				*p++ = 0;
+				*p++ = 0;
+			} else
+				p = encode_entryplus_baggage(cd, p, &fh);
+		}
+		num_entry_words = p - cd->buffer;
+	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+		/* temporarily encode entry into next page, then move back to
+		 * current and next page in rq_respages[] */
+		u32 *p1, *tmp;
+		int len1, len2;
+
+		/* grab next page for temporary storage of entry */
+		p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+
+		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
+
+		/* throw in readdirplus baggage */
+		if (plus) {
+			struct svc_fh	fh;
+
+			if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
+				/* zero out the filehandle */
+				*p1++ = 0;
+				*p1++ = 0;
+			} else
+				p1 = encode_entryplus_baggage(cd, p1, &fh);
+		}
+
+		/* determine entry word length and lengths to go in pages */
+		num_entry_words = p1 - tmp;
+		len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer;
+		if ((num_entry_words << 2) < len1) {
+			/* the actual number of words in the entry is less
+			 * than elen and can still fit in the current page
+			 */
+			memmove(p, tmp, num_entry_words << 2);
+			p += num_entry_words;
+
+			/* update offset */
+			cd->offset = cd->buffer + (cd->offset - tmp);
+		} else {
+			unsigned int offset_r = (cd->offset - tmp) << 2;
+
+			/* update pointer to offset location.
+			 * This is a 64bit quantity, so we need to
+			 * deal with 3 cases:
+			 *  -	entirely in first page
+			 *  -	entirely in second page
+			 *  -	4 bytes in each page
+			 */
+			if (offset_r + 8 <= len1) {
+				cd->offset = p + (cd->offset - tmp);
+			} else if (offset_r >= len1) {
+				cd->offset -= len1 >> 2;
+			} else {
+				/* sitting on the fence */
+				BUG_ON(offset_r != len1 - 4);
+				cd->offset = p + (cd->offset - tmp);
+				cd->offset1 = tmp;
+			}
+
+			len2 = (num_entry_words << 2) - len1;
+
+			/* move from temp page to current and next pages */
+			memmove(p, tmp, len1);
+			memmove(tmp, (caddr_t)tmp+len1, len2);
+
+			p = tmp + (len2 >> 2);
+		}
+	}
+	else {
+		cd->common.err = nfserr_toosmall;
+		return -EINVAL;
+	}
+
+	cd->buflen -= num_entry_words;
+	cd->buffer = p;
+	cd->common.err = nfs_ok;
+	return 0;
+
+}
+
+int
+nfs3svc_encode_entry(struct readdir_cd *cd, const char *name,
+		     int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+	return encode_entry(cd, name, namlen, offset, ino, d_type, 0);
+}
+
+int
+nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name,
+			  int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+	return encode_entry(cd, name, namlen, offset, ino, d_type, 1);
+}
+
+/* FSSTAT */
+int
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_fsstatres *resp)
+{
+	struct kstatfs	*s = &resp->stats;
+	u64		bs = s->f_bsize;
+
+	*p++ = xdr_zero;	/* no post_op_attr */
+
+	if (resp->status == 0) {
+		p = xdr_encode_hyper(p, bs * s->f_blocks);	/* total bytes */
+		p = xdr_encode_hyper(p, bs * s->f_bfree);	/* free bytes */
+		p = xdr_encode_hyper(p, bs * s->f_bavail);	/* user available bytes */
+		p = xdr_encode_hyper(p, s->f_files);	/* total inodes */
+		p = xdr_encode_hyper(p, s->f_ffree);	/* free inodes */
+		p = xdr_encode_hyper(p, s->f_ffree);	/* user available inodes */
+		*p++ = htonl(resp->invarsec);	/* mean unchanged time */
+	}
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* FSINFO */
+int
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_fsinfores *resp)
+{
+	*p++ = xdr_zero;	/* no post_op_attr */
+
+	if (resp->status == 0) {
+		*p++ = htonl(resp->f_rtmax);
+		*p++ = htonl(resp->f_rtpref);
+		*p++ = htonl(resp->f_rtmult);
+		*p++ = htonl(resp->f_wtmax);
+		*p++ = htonl(resp->f_wtpref);
+		*p++ = htonl(resp->f_wtmult);
+		*p++ = htonl(resp->f_dtpref);
+		p = xdr_encode_hyper(p, resp->f_maxfilesize);
+		*p++ = xdr_one;
+		*p++ = xdr_zero;
+		*p++ = htonl(resp->f_properties);
+	}
+
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* PATHCONF */
+int
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_pathconfres *resp)
+{
+	*p++ = xdr_zero;	/* no post_op_attr */
+
+	if (resp->status == 0) {
+		*p++ = htonl(resp->p_link_max);
+		*p++ = htonl(resp->p_name_max);
+		*p++ = htonl(resp->p_no_trunc);
+		*p++ = htonl(resp->p_chown_restricted);
+		*p++ = htonl(resp->p_case_insensitive);
+		*p++ = htonl(resp->p_case_preserving);
+	}
+
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* COMMIT */
+int
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_commitres *resp)
+{
+	p = encode_wcc_data(rqstp, p, &resp->fh);
+	/* Write verifier */
+	if (resp->status == 0) {
+		*p++ = htonl(nfssvc_boot.tv_sec);
+		*p++ = htonl(nfssvc_boot.tv_usec);
+	}
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+int
+nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_attrstat *resp)
+{
+	fh_put(&resp->fh);
+	return 1;
+}
+
+int
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd3_fhandle_pair *resp)
+{
+	fh_put(&resp->fh1);
+	fh_put(&resp->fh2);
+	return 1;
+}
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
new file mode 100644
index 000000000000..11ebf6c4aa54
--- /dev/null
+++ b/fs/nfsd/nfs4acl.c
@@ -0,0 +1,954 @@
+/*
+ *  fs/nfs4acl/acl.c
+ *
+ *  Common NFSv4 ACL handling code.
+ *
+ *  Copyright (c) 2002, 2003 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *  Jeff Sedlak <jsedlak@umich.edu>
+ *  J. Bruce Fields <bfields@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/nfs_fs.h>
+#include <linux/posix_acl.h>
+#include <linux/nfs4.h>
+#include <linux/nfs4_acl.h>
+
+
+/* mode bit translations: */
+#define NFS4_READ_MODE (NFS4_ACE_READ_DATA)
+#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_APPEND_DATA)
+#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE
+#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
+#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
+
+/* We don't support these bits; insist they be neither allowed nor denied */
+#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
+		| NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
+
+/* flags used to simulate posix default ACLs */
+#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
+		| NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
+
+#define MASK_EQUAL(mask1, mask2) \
+	( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
+
+static u32
+mask_from_posix(unsigned short perm, unsigned int flags)
+{
+	int mask = NFS4_ANYONE_MODE;
+
+	if (flags & NFS4_ACL_OWNER)
+		mask |= NFS4_OWNER_MODE;
+	if (perm & ACL_READ)
+		mask |= NFS4_READ_MODE;
+	if (perm & ACL_WRITE)
+		mask |= NFS4_WRITE_MODE;
+	if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR))
+		mask |= NFS4_ACE_DELETE_CHILD;
+	if (perm & ACL_EXECUTE)
+		mask |= NFS4_EXECUTE_MODE;
+	return mask;
+}
+
+static u32
+deny_mask(u32 allow_mask, unsigned int flags)
+{
+	u32 ret = ~allow_mask & ~NFS4_MASK_UNSUPP;
+	if (!(flags & NFS4_ACL_DIR))
+		ret &= ~NFS4_ACE_DELETE_CHILD;
+	return ret;
+}
+
+/* XXX: modify functions to return NFS errors; they're only ever
+ * used by nfs code, after all.... */
+
+static int
+mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
+{
+	u32 ignore = 0;
+
+	if (!(flags & NFS4_ACL_DIR))
+		ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */
+	perm |= ignore;
+	*mode = 0;
+	if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
+		*mode |= ACL_READ;
+	if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE)
+		*mode |= ACL_WRITE;
+	if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
+		*mode |= ACL_EXECUTE;
+	if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
+		return -EINVAL;
+	return 0;
+}
+
+struct ace_container {
+	struct nfs4_ace  *ace;
+	struct list_head  ace_l;
+};
+
+static short ace2type(struct nfs4_ace *);
+static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
+static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
+int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
+int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+
+struct nfs4_acl *
+nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
+			unsigned int flags)
+{
+	struct nfs4_acl *acl;
+	int error = -EINVAL;
+
+	if ((pacl != NULL &&
+		(posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) ||
+	    (dpacl != NULL &&
+		(posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0)))
+		goto out_err;
+
+	acl = nfs4_acl_new();
+	if (acl == NULL) {
+		error = -ENOMEM;
+		goto out_err;
+	}
+
+	if (pacl != NULL) {
+		error = _posix_to_nfsv4_one(pacl, acl,
+						flags & ~NFS4_ACL_TYPE_DEFAULT);
+		if (error < 0)
+			goto out_acl;
+	}
+
+	if (dpacl != NULL) {
+		error = _posix_to_nfsv4_one(dpacl, acl,
+						flags | NFS4_ACL_TYPE_DEFAULT);
+		if (error < 0)
+			goto out_acl;
+	}
+
+	return acl;
+
+out_acl:
+	nfs4_acl_free(acl);
+out_err:
+	acl = ERR_PTR(error);
+
+	return acl;
+}
+
+static int
+nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype,
+		uid_t owner, unsigned int flags)
+{
+	int error;
+
+	error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
+				 eflag, mask, whotype, owner);
+	if (error < 0)
+		return error;
+	error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+				eflag, deny_mask(mask, flags), whotype, owner);
+	return error;
+}
+
+/* We assume the acl has been verified with posix_acl_valid. */
+static int
+_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
+						unsigned int flags)
+{
+	struct posix_acl_entry *pa, *pe, *group_owner_entry;
+	int error = -EINVAL;
+	u32 mask, mask_mask;
+	int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ?
+					NFS4_INHERITANCE_FLAGS : 0);
+
+	BUG_ON(pacl->a_count < 3);
+	pe = pacl->a_entries + pacl->a_count;
+	pa = pe - 2; /* if mask entry exists, it's second from the last. */
+	if (pa->e_tag == ACL_MASK)
+		mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags);
+	else
+		mask_mask = 0;
+
+	pa = pacl->a_entries;
+	BUG_ON(pa->e_tag != ACL_USER_OBJ);
+	mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER);
+	error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags);
+	if (error < 0)
+		goto out;
+	pa++;
+
+	while (pa->e_tag == ACL_USER) {
+		mask = mask_from_posix(pa->e_perm, flags);
+		error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+				eflag,  mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id);
+		if (error < 0)
+			goto out;
+
+
+		error = nfs4_acl_add_pair(acl, eflag, mask,
+				NFS4_ACL_WHO_NAMED, pa->e_id, flags);
+		if (error < 0)
+			goto out;
+		pa++;
+	}
+
+	/* In the case of groups, we apply allow ACEs first, then deny ACEs,
+	 * since a user can be in more than one group.  */
+
+	/* allow ACEs */
+
+	if (pacl->a_count > 3) {
+		BUG_ON(pa->e_tag != ACL_GROUP_OBJ);
+		error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+				NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
+				NFS4_ACL_WHO_GROUP, 0);
+		if (error < 0)
+			goto out;
+	}
+	group_owner_entry = pa;
+	mask = mask_from_posix(pa->e_perm, flags);
+	error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
+			NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
+			NFS4_ACL_WHO_GROUP, 0);
+	if (error < 0)
+		goto out;
+	pa++;
+
+	while (pa->e_tag == ACL_GROUP) {
+		mask = mask_from_posix(pa->e_perm, flags);
+		error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+				NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
+				NFS4_ACL_WHO_NAMED, pa->e_id);
+		if (error < 0)
+			goto out;
+
+		error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
+		    		NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
+		    		NFS4_ACL_WHO_NAMED, pa->e_id);
+		if (error < 0)
+			goto out;
+		pa++;
+	}
+
+	/* deny ACEs */
+
+	pa = group_owner_entry;
+	mask = mask_from_posix(pa->e_perm, flags);
+	error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+			NFS4_ACE_IDENTIFIER_GROUP | eflag,
+			deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0);
+	if (error < 0)
+		goto out;
+	pa++;
+	while (pa->e_tag == ACL_GROUP) {
+		mask = mask_from_posix(pa->e_perm, flags);
+		error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+		    		NFS4_ACE_IDENTIFIER_GROUP | eflag,
+		    		deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id);
+		if (error < 0)
+			goto out;
+		pa++;
+	}
+
+	if (pa->e_tag == ACL_MASK)
+		pa++;
+	BUG_ON(pa->e_tag != ACL_OTHER);
+	mask = mask_from_posix(pa->e_perm, flags);
+	error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags);
+
+out:
+	return error;
+}
+
+static void
+sort_pacl_range(struct posix_acl *pacl, int start, int end) {
+	int sorted = 0, i;
+	struct posix_acl_entry tmp;
+
+	/* We just do a bubble sort; easy to do in place, and we're not
+	 * expecting acl's to be long enough to justify anything more. */
+	while (!sorted) {
+		sorted = 1;
+		for (i = start; i < end; i++) {
+			if (pacl->a_entries[i].e_id
+					> pacl->a_entries[i+1].e_id) {
+				sorted = 0;
+				tmp = pacl->a_entries[i];
+				pacl->a_entries[i] = pacl->a_entries[i+1];
+				pacl->a_entries[i+1] = tmp;
+			}
+		}
+	}
+}
+
+static void
+sort_pacl(struct posix_acl *pacl)
+{
+	/* posix_acl_valid requires that users and groups be in order
+	 * by uid/gid. */
+	int i, j;
+
+	if (pacl->a_count <= 4)
+		return; /* no users or groups */
+	i = 1;
+	while (pacl->a_entries[i].e_tag == ACL_USER)
+		i++;
+	sort_pacl_range(pacl, 1, i-1);
+
+	BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
+	j = i++;
+	while (pacl->a_entries[j].e_tag == ACL_GROUP)
+		j++;
+	sort_pacl_range(pacl, i, j-1);
+	return;
+}
+
+static int
+write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
+		struct posix_acl_entry **pace, short tag, unsigned int flags)
+{
+	struct posix_acl_entry *this = *pace;
+
+	if (*pace == pacl->a_entries + pacl->a_count)
+		return -EINVAL; /* fell off the end */
+	(*pace)++;
+	this->e_tag = tag;
+	if (tag == ACL_USER_OBJ)
+		flags |= NFS4_ACL_OWNER;
+	if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
+		return -EINVAL;
+	this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
+			ace->who : ACL_UNDEFINED_ID);
+	return 0;
+}
+
+static struct nfs4_ace *
+get_next_v4_ace(struct list_head **p, struct list_head *head)
+{
+	struct nfs4_ace *ace;
+
+	*p = (*p)->next;
+	if (*p == head)
+		return NULL;
+	ace = list_entry(*p, struct nfs4_ace, l_ace);
+
+	return ace;
+}
+
+int
+nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
+		struct posix_acl **dpacl, unsigned int flags)
+{
+	struct nfs4_acl *dacl;
+	int error = -ENOMEM;
+
+	*pacl = NULL;
+	*dpacl = NULL;
+
+	dacl = nfs4_acl_new();
+	if (dacl == NULL)
+		goto out;
+
+	error = nfs4_acl_split(acl, dacl);
+	if (error < 0)
+		goto out_acl;
+
+	if (pacl != NULL) {
+		if (acl->naces == 0) {
+			error = -ENODATA;
+			goto try_dpacl;
+		}
+
+		*pacl = _nfsv4_to_posix_one(acl, flags);
+		if (IS_ERR(*pacl)) {
+			error = PTR_ERR(*pacl);
+			*pacl = NULL;
+			goto out_acl;
+		}
+	}
+
+try_dpacl:
+	if (dpacl != NULL) {
+		if (dacl->naces == 0) {
+			if (pacl == NULL || *pacl == NULL)
+				error = -ENODATA;
+			goto out_acl;
+		}
+
+		error = 0;
+		*dpacl = _nfsv4_to_posix_one(dacl, flags);
+		if (IS_ERR(*dpacl)) {
+			error = PTR_ERR(*dpacl);
+			*dpacl = NULL;
+			goto out_acl;
+		}
+	}
+
+out_acl:
+	if (error && pacl) {
+		posix_acl_release(*pacl);
+		*pacl = NULL;
+	}
+	nfs4_acl_free(dacl);
+out:
+	return error;
+}
+
+static int
+same_who(struct nfs4_ace *a, struct nfs4_ace *b)
+{
+	return a->whotype == b->whotype &&
+		(a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who);
+}
+
+static int
+complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny,
+		unsigned int flags)
+{
+	int ignore = 0;
+	if (!(flags & NFS4_ACL_DIR))
+		ignore |= NFS4_ACE_DELETE_CHILD;
+	return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
+			  ignore|deny->access_mask) &&
+		allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
+		deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
+		allow->flag == deny->flag &&
+		same_who(allow, deny);
+}
+
+static inline int
+user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+		struct posix_acl *pacl, struct posix_acl_entry **pace,
+		unsigned int flags)
+{
+	int error = -EINVAL;
+	struct nfs4_ace *ace, *ace2;
+
+	ace = get_next_v4_ace(p, &n4acl->ace_head);
+	if (ace == NULL)
+		goto out;
+	if (ace2type(ace) != ACL_USER_OBJ)
+		goto out;
+	error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
+	if (error < 0)
+		goto out;
+	error = -EINVAL;
+	ace2 = get_next_v4_ace(p, &n4acl->ace_head);
+	if (ace2 == NULL)
+		goto out;
+	if (!complementary_ace_pair(ace, ace2, flags))
+		goto out;
+	error = 0;
+out:
+	return error;
+}
+
+static inline int
+users_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+		struct nfs4_ace **mask_ace,
+		struct posix_acl *pacl, struct posix_acl_entry **pace,
+		unsigned int flags)
+{
+	int error = -EINVAL;
+	struct nfs4_ace *ace, *ace2;
+
+	ace = get_next_v4_ace(p, &n4acl->ace_head);
+	if (ace == NULL)
+		goto out;
+	while (ace2type(ace) == ACL_USER) {
+		if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+			goto out;
+		if (*mask_ace &&
+			!MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
+			goto out;
+		*mask_ace = ace;
+		ace = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace == NULL)
+			goto out;
+		if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
+			goto out;
+		error = write_pace(ace, pacl, pace, ACL_USER, flags);
+		if (error < 0)
+			goto out;
+		error = -EINVAL;
+		ace2 = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace2 == NULL)
+			goto out;
+		if (!complementary_ace_pair(ace, ace2, flags))
+			goto out;
+		if ((*mask_ace)->flag != ace2->flag ||
+				!same_who(*mask_ace, ace2))
+			goto out;
+		ace = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace == NULL)
+			goto out;
+	}
+	error = 0;
+out:
+	return error;
+}
+
+static inline int
+group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+		struct nfs4_ace **mask_ace,
+		struct posix_acl *pacl, struct posix_acl_entry **pace,
+		unsigned int flags)
+{
+	int error = -EINVAL;
+	struct nfs4_ace *ace, *ace2;
+	struct ace_container *ac;
+	struct list_head group_l;
+
+	INIT_LIST_HEAD(&group_l);
+	ace = list_entry(*p, struct nfs4_ace, l_ace);
+
+	/* group owner (mask and allow aces) */
+
+	if (pacl->a_count != 3) {
+		/* then the group owner should be preceded by mask */
+		if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+			goto out;
+		if (*mask_ace &&
+			!MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
+			goto out;
+		*mask_ace = ace;
+		ace = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace == NULL)
+			goto out;
+
+		if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace))
+			goto out;
+	}
+
+	if (ace2type(ace) != ACL_GROUP_OBJ)
+		goto out;
+
+	ac = kmalloc(sizeof(*ac), GFP_KERNEL);
+	error = -ENOMEM;
+	if (ac == NULL)
+		goto out;
+	ac->ace = ace;
+	list_add_tail(&ac->ace_l, &group_l);
+
+	error = -EINVAL;
+	if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
+		goto out;
+
+	error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags);
+	if (error < 0)
+		goto out;
+
+	error = -EINVAL;
+	ace = get_next_v4_ace(p, &n4acl->ace_head);
+	if (ace == NULL)
+		goto out;
+
+	/* groups (mask and allow aces) */
+
+	while (ace2type(ace) == ACL_GROUP) {
+		if (*mask_ace == NULL)
+			goto out;
+
+		if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
+			!MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
+			goto out;
+		*mask_ace = ace;
+
+		ace = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace == NULL)
+			goto out;
+		ac = kmalloc(sizeof(*ac), GFP_KERNEL);
+		error = -ENOMEM;
+		if (ac == NULL)
+			goto out;
+		error = -EINVAL;
+		if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
+				!same_who(ace, *mask_ace))
+			goto out;
+
+		ac->ace = ace;
+		list_add_tail(&ac->ace_l, &group_l);
+
+		error = write_pace(ace, pacl, pace, ACL_GROUP, flags);
+		if (error < 0)
+			goto out;
+		error = -EINVAL;
+		ace = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace == NULL)
+			goto out;
+	}
+
+	/* group owner (deny ace) */
+
+	if (ace2type(ace) != ACL_GROUP_OBJ)
+		goto out;
+	ac = list_entry(group_l.next, struct ace_container, ace_l);
+	ace2 = ac->ace;
+	if (!complementary_ace_pair(ace2, ace, flags))
+		goto out;
+	list_del(group_l.next);
+	kfree(ac);
+
+	/* groups (deny aces) */
+
+	while (!list_empty(&group_l)) {
+		ace = get_next_v4_ace(p, &n4acl->ace_head);
+		if (ace == NULL)
+			goto out;
+		if (ace2type(ace) != ACL_GROUP)
+			goto out;
+		ac = list_entry(group_l.next, struct ace_container, ace_l);
+		ace2 = ac->ace;
+		if (!complementary_ace_pair(ace2, ace, flags))
+			goto out;
+		list_del(group_l.next);
+		kfree(ac);
+	}
+
+	ace = get_next_v4_ace(p, &n4acl->ace_head);
+	if (ace == NULL)
+		goto out;
+	if (ace2type(ace) != ACL_OTHER)
+		goto out;
+	error = 0;
+out:
+	while (!list_empty(&group_l)) {
+		ac = list_entry(group_l.next, struct ace_container, ace_l);
+		list_del(group_l.next);
+		kfree(ac);
+	}
+	return error;
+}
+
+static inline int
+mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+		struct nfs4_ace **mask_ace,
+		struct posix_acl *pacl, struct posix_acl_entry **pace,
+		unsigned int flags)
+{
+	int error = -EINVAL;
+	struct nfs4_ace *ace;
+
+	ace = list_entry(*p, struct nfs4_ace, l_ace);
+	if (pacl->a_count != 3) {
+		if (*mask_ace == NULL)
+			goto out;
+		(*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
+		write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
+	}
+	error = 0;
+out:
+	return error;
+}
+
+static inline int
+other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+		struct posix_acl *pacl, struct posix_acl_entry **pace,
+		unsigned int flags)
+{
+	int error = -EINVAL;
+	struct nfs4_ace *ace, *ace2;
+
+	ace = list_entry(*p, struct nfs4_ace, l_ace);
+	if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
+		goto out;
+	error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
+	if (error < 0)
+		goto out;
+	error = -EINVAL;
+	ace2 = get_next_v4_ace(p, &n4acl->ace_head);
+	if (ace2 == NULL)
+		goto out;
+	if (!complementary_ace_pair(ace, ace2, flags))
+		goto out;
+	error = 0;
+out:
+	return error;
+}
+
+static int
+calculate_posix_ace_count(struct nfs4_acl *n4acl)
+{
+	if (n4acl->naces == 6) /* owner, owner group, and other only */
+		return 3;
+	else { /* Otherwise there must be a mask entry. */
+		/* Also, the remaining entries are for named users and
+		 * groups, and come in threes (mask, allow, deny): */
+		if (n4acl->naces < 7)
+			return -1;
+		if ((n4acl->naces - 7) % 3)
+			return -1;
+		return 4 + (n4acl->naces - 7)/3;
+	}
+}
+
+
+static struct posix_acl *
+_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
+{
+	struct posix_acl *pacl;
+	int error = -EINVAL, nace = 0;
+	struct list_head *p;
+	struct nfs4_ace *mask_ace = NULL;
+	struct posix_acl_entry *pace;
+
+	nace = calculate_posix_ace_count(n4acl);
+	if (nace < 0)
+		goto out_err;
+
+	pacl = posix_acl_alloc(nace, GFP_KERNEL);
+	error = -ENOMEM;
+	if (pacl == NULL)
+		goto out_err;
+
+	pace = &pacl->a_entries[0];
+	p = &n4acl->ace_head;
+
+	error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
+	if (error)
+		goto out_acl;
+
+	error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
+	if (error)
+		goto out_acl;
+
+	error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace,
+						flags);
+	if (error)
+		goto out_acl;
+
+	error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
+	if (error)
+		goto out_acl;
+	error = other_from_v4(n4acl, &p, pacl, &pace, flags);
+	if (error)
+		goto out_acl;
+
+	error = -EINVAL;
+	if (p->next != &n4acl->ace_head)
+		goto out_acl;
+	if (pace != pacl->a_entries + pacl->a_count)
+		goto out_acl;
+
+	sort_pacl(pacl);
+
+	return pacl;
+out_acl:
+	posix_acl_release(pacl);
+out_err:
+	pacl = ERR_PTR(error);
+	return pacl;
+}
+
+int
+nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
+{
+	struct list_head *h, *n;
+	struct nfs4_ace *ace;
+	int error = 0;
+
+	list_for_each_safe(h, n, &acl->ace_head) {
+		ace = list_entry(h, struct nfs4_ace, l_ace);
+
+		if ((ace->flag & NFS4_INHERITANCE_FLAGS)
+				!= NFS4_INHERITANCE_FLAGS)
+			continue;
+
+		error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+				ace->access_mask, ace->whotype, ace->who) == -1;
+		if (error < 0)
+			goto out;
+
+		list_del(h);
+		kfree(ace);
+		acl->naces--;
+	}
+
+out:
+	return error;
+}
+
+static short
+ace2type(struct nfs4_ace *ace)
+{
+	switch (ace->whotype) {
+		case NFS4_ACL_WHO_NAMED:
+			return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ?
+					ACL_GROUP : ACL_USER);
+		case NFS4_ACL_WHO_OWNER:
+			return ACL_USER_OBJ;
+		case NFS4_ACL_WHO_GROUP:
+			return ACL_GROUP_OBJ;
+		case NFS4_ACL_WHO_EVERYONE:
+			return ACL_OTHER;
+	}
+	BUG();
+	return -1;
+}
+
+EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4);
+EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix);
+
+struct nfs4_acl *
+nfs4_acl_new(void)
+{
+	struct nfs4_acl *acl;
+
+	if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL)
+		return NULL;
+
+	acl->naces = 0;
+	INIT_LIST_HEAD(&acl->ace_head);
+
+	return acl;
+}
+
+void
+nfs4_acl_free(struct nfs4_acl *acl)
+{
+	struct list_head *h;
+	struct nfs4_ace *ace;
+
+	if (!acl)
+		return;
+
+	while (!list_empty(&acl->ace_head)) {
+		h = acl->ace_head.next;
+		list_del(h);
+		ace = list_entry(h, struct nfs4_ace, l_ace);
+		kfree(ace);
+	}
+
+	kfree(acl);
+
+	return;
+}
+
+int
+nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
+		int whotype, uid_t who)
+{
+	struct nfs4_ace *ace;
+
+	if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
+		return -1;
+
+	ace->type = type;
+	ace->flag = flag;
+	ace->access_mask = access_mask;
+	ace->whotype = whotype;
+	ace->who = who;
+
+	list_add_tail(&ace->l_ace, &acl->ace_head);
+	acl->naces++;
+
+	return 0;
+}
+
+static struct {
+	char *string;
+	int   stringlen;
+	int type;
+} s2t_map[] = {
+	{
+		.string    = "OWNER@",
+		.stringlen = sizeof("OWNER@") - 1,
+		.type      = NFS4_ACL_WHO_OWNER,
+	},
+	{
+		.string    = "GROUP@",
+		.stringlen = sizeof("GROUP@") - 1,
+		.type      = NFS4_ACL_WHO_GROUP,
+	},
+	{
+		.string    = "EVERYONE@",
+		.stringlen = sizeof("EVERYONE@") - 1,
+		.type      = NFS4_ACL_WHO_EVERYONE,
+	},
+};
+
+int
+nfs4_acl_get_whotype(char *p, u32 len)
+{
+	int i;
+
+	for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+		if (s2t_map[i].stringlen == len &&
+				0 == memcmp(s2t_map[i].string, p, len))
+			return s2t_map[i].type;
+	}
+	return NFS4_ACL_WHO_NAMED;
+}
+
+int
+nfs4_acl_write_who(int who, char *p)
+{
+	int i;
+
+	for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+		if (s2t_map[i].type == who) {
+			memcpy(p, s2t_map[i].string, s2t_map[i].stringlen);
+			return s2t_map[i].stringlen;
+		}
+	}
+	BUG();
+	return -1;
+}
+
+static inline int
+match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
+{
+	switch (ace->whotype) {
+		case NFS4_ACL_WHO_NAMED:
+			return who == ace->who;
+		case NFS4_ACL_WHO_OWNER:
+			return who == owner;
+		case NFS4_ACL_WHO_GROUP:
+			return who == group;
+		case NFS4_ACL_WHO_EVERYONE:
+			return 1;
+		default:
+			return 0;
+	}
+}
+
+EXPORT_SYMBOL(nfs4_acl_new);
+EXPORT_SYMBOL(nfs4_acl_free);
+EXPORT_SYMBOL(nfs4_acl_add_ace);
+EXPORT_SYMBOL(nfs4_acl_get_whotype);
+EXPORT_SYMBOL(nfs4_acl_write_who);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
new file mode 100644
index 000000000000..c70de9c2af74
--- /dev/null
+++ b/fs/nfsd/nfs4callback.c
@@ -0,0 +1,547 @@
+/*
+ *  linux/fs/nfsd/nfs4callback.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/inet.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/state.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs4.h>
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+#define NFSPROC4_CB_NULL 0
+#define NFSPROC4_CB_COMPOUND 1
+
+/* declarations */
+static void nfs4_cb_null(struct rpc_task *task);
+extern spinlock_t recall_lock;
+
+/* Index of predefined Linux callback client operations */
+
+enum {
+        NFSPROC4_CLNT_CB_NULL = 0,
+	NFSPROC4_CLNT_CB_RECALL,
+};
+
+enum nfs_cb_opnum4 {
+	OP_CB_RECALL            = 4,
+};
+
+#define NFS4_MAXTAGLEN		20
+
+#define NFS4_enc_cb_null_sz		0
+#define NFS4_dec_cb_null_sz		0
+#define cb_compound_enc_hdr_sz		4
+#define cb_compound_dec_hdr_sz		(3 + (NFS4_MAXTAGLEN >> 2))
+#define op_enc_sz			1
+#define op_dec_sz			2
+#define enc_nfs4_fh_sz			(1 + (NFS4_FHSIZE >> 2))
+#define enc_stateid_sz			16
+#define NFS4_enc_cb_recall_sz		(cb_compound_enc_hdr_sz +       \
+					1 + enc_stateid_sz +            \
+					enc_nfs4_fh_sz)
+
+#define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+					op_dec_sz)
+
+/*
+* Generic encode routines from fs/nfs/nfs4xdr.c
+*/
+static inline u32 *
+xdr_writemem(u32 *p, const void *ptr, int nbytes)
+{
+	int tmp = XDR_QUADLEN(nbytes);
+	if (!tmp)
+		return p;
+	p[tmp-1] = 0;
+	memcpy(p, ptr, nbytes);
+	return p + tmp;
+}
+
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITEMEM(ptr,nbytes)     do {                           \
+	p = xdr_writemem(p, ptr, nbytes);                       \
+} while (0)
+#define RESERVE_SPACE(nbytes)   do {                            \
+	p = xdr_reserve_space(xdr, nbytes);                     \
+	if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+	BUG_ON(!p);                                             \
+} while (0)
+
+/*
+ * Generic decode routines from fs/nfs/nfs4xdr.c
+ */
+#define DECODE_TAIL                             \
+	status = 0;                             \
+out:                                            \
+	return status;                          \
+xdr_error:                                      \
+	dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+	status = -EIO;                          \
+	goto out
+
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {                  \
+	(x) = (u64)ntohl(*p++) << 32;           \
+	(x) |= ntohl(*p++);                     \
+} while (0)
+#define READTIME(x)       do {                  \
+	p++;                                    \
+	(x.tv_sec) = ntohl(*p++);               \
+	(x.tv_nsec) = ntohl(*p++);              \
+} while (0)
+#define READ_BUF(nbytes)  do { \
+	p = xdr_inline_decode(xdr, nbytes); \
+	if (!p) { \
+		dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
+			__FUNCTION__, __LINE__); \
+		return -EIO; \
+	} \
+} while (0)
+
+struct nfs4_cb_compound_hdr {
+	int		status;
+	u32		ident;
+	u32		nops;
+	u32		taglen;
+	char *		tag;
+};
+
+static struct {
+int stat;
+int errno;
+} nfs_cb_errtbl[] = {
+	{ NFS4_OK,		0               },
+	{ NFS4ERR_PERM,		EPERM           },
+	{ NFS4ERR_NOENT,	ENOENT          },
+	{ NFS4ERR_IO,		EIO             },
+	{ NFS4ERR_NXIO,		ENXIO           },
+	{ NFS4ERR_ACCESS,	EACCES          },
+	{ NFS4ERR_EXIST,	EEXIST          },
+	{ NFS4ERR_XDEV,		EXDEV           },
+	{ NFS4ERR_NOTDIR,	ENOTDIR         },
+	{ NFS4ERR_ISDIR,	EISDIR          },
+	{ NFS4ERR_INVAL,	EINVAL          },
+	{ NFS4ERR_FBIG,		EFBIG           },
+	{ NFS4ERR_NOSPC,	ENOSPC          },
+	{ NFS4ERR_ROFS,		EROFS           },
+	{ NFS4ERR_MLINK,	EMLINK          },
+	{ NFS4ERR_NAMETOOLONG,	ENAMETOOLONG    },
+	{ NFS4ERR_NOTEMPTY,	ENOTEMPTY       },
+	{ NFS4ERR_DQUOT,	EDQUOT          },
+	{ NFS4ERR_STALE,	ESTALE          },
+	{ NFS4ERR_BADHANDLE,	EBADHANDLE      },
+	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE      },
+	{ NFS4ERR_NOTSUPP,	ENOTSUPP        },
+	{ NFS4ERR_TOOSMALL,	ETOOSMALL       },
+	{ NFS4ERR_SERVERFAULT,	ESERVERFAULT    },
+	{ NFS4ERR_BADTYPE,	EBADTYPE        },
+	{ NFS4ERR_LOCKED,	EAGAIN          },
+	{ NFS4ERR_RESOURCE,	EREMOTEIO       },
+	{ NFS4ERR_SYMLINK,	ELOOP           },
+	{ NFS4ERR_OP_ILLEGAL,	EOPNOTSUPP      },
+	{ NFS4ERR_DEADLOCK,	EDEADLK         },
+	{ -1,                   EIO             }
+};
+
+static int
+nfs_cb_stat_to_errno(int stat)
+{
+	int i;
+	for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+		if (nfs_cb_errtbl[i].stat == stat)
+			return nfs_cb_errtbl[i].errno;
+	}
+	/* If we cannot translate the error, the recovery routines should
+	* handle it.
+	* Note: remaining NFSv4 error codes have values > 10000, so should
+	* not conflict with native Linux error codes.
+	*/
+	return stat;
+}
+
+/*
+ * XDR encode
+ */
+
+static int
+encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+{
+	u32 * p;
+
+	RESERVE_SPACE(16);
+	WRITE32(0);            /* tag length is always 0 */
+	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(hdr->ident);
+	WRITE32(hdr->nops);
+	return 0;
+}
+
+static int
+encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
+{
+	u32 *p;
+	int len = cb_rec->cbr_fhlen;
+
+	RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
+	WRITE32(OP_CB_RECALL);
+	WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t));
+	WRITE32(cb_rec->cbr_trunc);
+	WRITE32(len);
+	WRITEMEM(cb_rec->cbr_fhval, len);
+	return 0;
+}
+
+static int
+nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
+{
+	struct xdr_stream xdrs, *xdr = &xdrs;
+
+	xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
+        RESERVE_SPACE(0);
+	return 0;
+}
+
+static int
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args)
+{
+	struct xdr_stream xdr;
+	struct nfs4_cb_compound_hdr hdr = {
+		.ident = args->cbr_ident,
+		.nops   = 1,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_cb_compound_hdr(&xdr, &hdr);
+	return (encode_cb_recall(&xdr, args));
+}
+
+
+static int
+decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+        u32 *p;
+
+        READ_BUF(8);
+        READ32(hdr->status);
+        READ32(hdr->taglen);
+        READ_BUF(hdr->taglen + 4);
+        hdr->tag = (char *)p;
+        p += XDR_QUADLEN(hdr->taglen);
+        READ32(hdr->nops);
+        return 0;
+}
+
+static int
+decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+	u32 *p;
+	u32 op;
+	int32_t nfserr;
+
+	READ_BUF(8);
+	READ32(op);
+	if (op != expected) {
+		dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
+		         " operation %d but we issued a request for %d\n",
+		         op, expected);
+		return -EIO;
+	}
+	READ32(nfserr);
+	if (nfserr != NFS_OK)
+		return -nfs_cb_stat_to_errno(nfserr);
+	return 0;
+}
+
+static int
+nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p)
+{
+	return 0;
+}
+
+static int
+nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p)
+{
+	struct xdr_stream xdr;
+	struct nfs4_cb_compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_cb_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
+out:
+	return status;
+}
+
+/*
+ * RPC procedure tables
+ */
+#ifndef MAX
+# define MAX(a, b)      (((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, call, argtype, restype)                              \
+[NFSPROC4_CLNT_##proc] = {                                      	\
+        .p_proc   = NFSPROC4_CB_##call,					\
+        .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
+        .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
+        .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+}
+
+struct rpc_procinfo     nfs4_cb_procedures[] = {
+    PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+    PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
+};
+
+struct rpc_version              nfs_cb_version4 = {
+        .number                 = 1,
+        .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
+        .procs                  = nfs4_cb_procedures
+};
+
+static struct rpc_version *	nfs_cb_version[] = {
+	NULL,
+	&nfs_cb_version4,
+};
+
+/*
+ * Use the SETCLIENTID credential
+ */
+struct rpc_cred *
+nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
+{
+        struct auth_cred acred;
+	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+	struct rpc_cred *ret;
+
+        get_group_info(clp->cl_cred.cr_group_info);
+        acred.uid = clp->cl_cred.cr_uid;
+        acred.gid = clp->cl_cred.cr_gid;
+        acred.group_info = clp->cl_cred.cr_group_info;
+
+        dprintk("NFSD:     looking up %s cred\n",
+                clnt->cl_auth->au_ops->au_name);
+        ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags);
+        put_group_info(clp->cl_cred.cr_group_info);
+        return ret;
+}
+
+/*
+ * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+ */
+void
+nfsd4_probe_callback(struct nfs4_client *clp)
+{
+	struct sockaddr_in	addr;
+	struct nfs4_callback    *cb = &clp->cl_callback;
+	struct rpc_timeout	timeparms;
+	struct rpc_xprt *	xprt;
+	struct rpc_program *	program = &cb->cb_program;
+	struct rpc_stat *	stat = &cb->cb_stat;
+	struct rpc_clnt *	clnt;
+	struct rpc_message msg = {
+		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+		.rpc_argp       = clp,
+	};
+	char                    hostname[32];
+	int status;
+
+	dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
+			cb->cb_parsed, atomic_read(&cb->cb_set));
+	if (!cb->cb_parsed || atomic_read(&cb->cb_set))
+		return;
+
+	/* Initialize address */
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cb->cb_port);
+	addr.sin_addr.s_addr = htonl(cb->cb_addr);
+
+	/* Initialize timeout */
+	timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
+	timeparms.to_retries = 0;
+	timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
+	timeparms.to_exponential = 1;
+
+	/* Create RPC transport */
+	if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) {
+		dprintk("NFSD: couldn't create callback transport!\n");
+		goto out_err;
+	}
+
+	/* Initialize rpc_program */
+	program->name = "nfs4_cb";
+	program->number = cb->cb_prog;
+	program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
+	program->version = nfs_cb_version;
+	program->stats = stat;
+
+	/* Initialize rpc_stat */
+	memset(stat, 0, sizeof(struct rpc_stat));
+	stat->program = program;
+
+	/* Create RPC client
+ 	 *
+	 * XXX AUTH_UNIX only - need AUTH_GSS....
+	 */
+	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
+	if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) {
+		dprintk("NFSD: couldn't create callback client\n");
+		goto out_xprt;
+	}
+	clnt->cl_intr = 0;
+	clnt->cl_softrtry = 1;
+	clnt->cl_chatty = 1;
+
+	/* Kick rpciod, put the call on the wire. */
+
+	if (rpciod_up() != 0) {
+		dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+		goto out_clnt;
+	}
+
+	/* the task holds a reference to the nfs4_client struct */
+	cb->cb_client = clnt;
+	atomic_inc(&clp->cl_count);
+
+	msg.rpc_cred = nfsd4_lookupcred(clp,0);
+	if (IS_ERR(msg.rpc_cred))
+		goto out_rpciod;
+	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
+	put_rpccred(msg.rpc_cred);
+
+	if (status != 0) {
+		dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
+		goto out_rpciod;
+	}
+	return;
+
+out_rpciod:
+	atomic_dec(&clp->cl_count);
+	rpciod_down();
+out_clnt:
+	rpc_shutdown_client(clnt);
+	goto out_err;
+out_xprt:
+	xprt_destroy(xprt);
+out_err:
+	dprintk("NFSD: warning: no callback path to client %.*s\n",
+		(int)clp->cl_name.len, clp->cl_name.data);
+	cb->cb_client = NULL;
+}
+
+static void
+nfs4_cb_null(struct rpc_task *task)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs4_callback *cb = &clp->cl_callback;
+	u32 addr = htonl(cb->cb_addr);
+
+	dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
+
+	if (task->tk_status < 0) {
+		dprintk("NFSD: callback establishment to client %.*s failed\n",
+			(int)clp->cl_name.len, clp->cl_name.data);
+		goto out;
+	}
+	atomic_set(&cb->cb_set, 1);
+	dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr));
+out:
+	put_nfs4_client(clp);
+}
+
+/*
+ * called with dp->dl_count inc'ed.
+ * nfs4_lock_state() may or may not have been called.
+ */
+void
+nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+	struct nfs4_client *clp = dp->dl_client;
+	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+	struct nfs4_cb_recall *cbr = &dp->dl_recall;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
+		.rpc_argp = cbr,
+	};
+	int retries = 1;
+	int status = 0;
+
+	if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
+		return;
+
+	msg.rpc_cred = nfsd4_lookupcred(clp, 0);
+	if (IS_ERR(msg.rpc_cred))
+		goto out;
+
+	cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
+	cbr->cbr_dp = dp;
+
+	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
+	while (retries--) {
+		switch (status) {
+			case -EIO:
+				/* Network partition? */
+			case -EBADHANDLE:
+			case -NFS4ERR_BAD_STATEID:
+				/* Race: client probably got cb_recall
+				 * before open reply granting delegation */
+				break;
+			default:
+				goto out_put_cred;
+		}
+		ssleep(2);
+		status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
+	}
+out_put_cred:
+	put_rpccred(msg.rpc_cred);
+out:
+	if (status == -EIO)
+		atomic_set(&clp->cl_callback.cb_set, 0);
+	/* Success or failure, now we're either waiting for lease expiration
+	 * or deleg_return. */
+	dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count));
+	nfs4_put_delegation(dp);
+	return;
+}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
new file mode 100644
index 000000000000..4ba540841cf6
--- /dev/null
+++ b/fs/nfsd/nfs4idmap.c
@@ -0,0 +1,588 @@
+/*
+ *  fs/nfsd/nfs4idmap.c
+ *
+ *  Mapping of UID/GIDs to name and vice versa.
+ *
+ *  Copyright (c) 2002, 2003 The Regents of the University of
+ *  Michigan.  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include <linux/sunrpc/svcauth.h>
+
+/*
+ * Cache entry
+ */
+
+/*
+ * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on
+ * that.
+ */
+
+#define IDMAP_TYPE_USER  0
+#define IDMAP_TYPE_GROUP 1
+
+struct ent {
+	struct cache_head h;
+	int               type;		       /* User / Group */
+	uid_t             id;
+	char              name[IDMAP_NAMESZ];
+	char              authname[IDMAP_NAMESZ];
+};
+
+#define DefineSimpleCacheLookupMap(STRUCT, FUNC)			\
+        DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
+        (struct STRUCT *item, int set), /*no setup */,			\
+	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
+	STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
+
+/* Common entry handling */
+
+#define ENT_HASHBITS          8
+#define ENT_HASHMAX           (1 << ENT_HASHBITS)
+#define ENT_HASHMASK          (ENT_HASHMAX - 1)
+
+static inline void
+ent_init(struct ent *new, struct ent *itm)
+{
+	new->id = itm->id;
+	new->type = itm->type;
+
+	strlcpy(new->name, itm->name, sizeof(new->name));
+	strlcpy(new->authname, itm->authname, sizeof(new->name));
+}
+
+static inline void
+ent_update(struct ent *new, struct ent *itm)
+{
+	ent_init(new, itm);
+}
+
+void
+ent_put(struct cache_head *ch, struct cache_detail *cd)
+{
+	if (cache_put(ch, cd)) {
+		struct ent *map = container_of(ch, struct ent, h);
+		kfree(map);
+	}
+}
+
+/*
+ * ID -> Name cache
+ */
+
+static struct cache_head *idtoname_table[ENT_HASHMAX];
+
+static uint32_t
+idtoname_hash(struct ent *ent)
+{
+	uint32_t hash;
+
+	hash = hash_str(ent->authname, ENT_HASHBITS);
+	hash = hash_long(hash ^ ent->id, ENT_HASHBITS);
+
+	/* Flip LSB for user/group */
+	if (ent->type == IDMAP_TYPE_GROUP)
+		hash ^= 1;
+
+	return hash;
+}
+
+static void
+idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
+    int *blen)
+{
+ 	struct ent *ent = container_of(ch, struct ent, h);
+	char idstr[11];
+
+	qword_add(bpp, blen, ent->authname);
+	snprintf(idstr, sizeof(idstr), "%d", ent->id);
+	qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
+	qword_add(bpp, blen, idstr);
+
+	(*bpp)[-1] = '\n';
+}
+
+static inline int
+idtoname_match(struct ent *a, struct ent *b)
+{
+	return (a->id == b->id && a->type == b->type &&
+	    strcmp(a->authname, b->authname) == 0);
+}
+
+static int
+idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
+{
+	struct ent *ent;
+
+	if (h == NULL) {
+		seq_puts(m, "#domain type id [name]\n");
+		return 0;
+	}
+	ent = container_of(h, struct ent, h);
+	seq_printf(m, "%s %s %d", ent->authname,
+			ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
+			ent->id);
+	if (test_bit(CACHE_VALID, &h->flags))
+		seq_printf(m, " %s", ent->name);
+	seq_printf(m, "\n");
+	return 0;
+}
+
+static void
+warn_no_idmapd(struct cache_detail *detail)
+{
+	printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
+			detail->last_close? "died" : "not been started");
+}
+
+
+static int         idtoname_parse(struct cache_detail *, char *, int);
+static struct ent *idtoname_lookup(struct ent *, int);
+
+struct cache_detail idtoname_cache = {
+	.hash_size	= ENT_HASHMAX,
+	.hash_table	= idtoname_table,
+	.name		= "nfs4.idtoname",
+	.cache_put	= ent_put,
+	.cache_request	= idtoname_request,
+	.cache_parse	= idtoname_parse,
+	.cache_show	= idtoname_show,
+	.warn_no_listener = warn_no_idmapd,
+};
+
+int
+idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+	struct ent ent, *res;
+	char *buf1, *bp;
+	int error = -EINVAL;
+
+	if (buf[buflen - 1] != '\n')
+		return (-EINVAL);
+	buf[buflen - 1]= '\0';
+
+	buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (buf1 == NULL)
+		return (-ENOMEM);
+
+	memset(&ent, 0, sizeof(ent));
+
+	/* Authentication name */
+	if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+		goto out;
+	memcpy(ent.authname, buf1, sizeof(ent.authname));
+
+	/* Type */
+	if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+		goto out;
+	ent.type = strcmp(buf1, "user") == 0 ?
+		IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
+
+	/* ID */
+	if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+		goto out;
+	ent.id = simple_strtoul(buf1, &bp, 10);
+	if (bp == buf1)
+		goto out;
+
+	/* expiry */
+	ent.h.expiry_time = get_expiry(&buf);
+	if (ent.h.expiry_time == 0)
+		goto out;
+
+	/* Name */
+	error = qword_get(&buf, buf1, PAGE_SIZE);
+	if (error == -EINVAL)
+		goto out;
+	if (error == -ENOENT)
+		set_bit(CACHE_NEGATIVE, &ent.h.flags);
+	else {
+		if (error >= IDMAP_NAMESZ) {
+			error = -EINVAL;
+			goto out;
+		}
+		memcpy(ent.name, buf1, sizeof(ent.name));
+	}
+	error = -ENOMEM;
+	if ((res = idtoname_lookup(&ent, 1)) == NULL)
+		goto out;
+
+	ent_put(&res->h, &idtoname_cache);
+
+	error = 0;
+out:
+	kfree(buf1);
+
+	return error;
+}
+
+static DefineSimpleCacheLookupMap(ent, idtoname);
+
+/*
+ * Name -> ID cache
+ */
+
+static struct cache_head *nametoid_table[ENT_HASHMAX];
+
+static inline int
+nametoid_hash(struct ent *ent)
+{
+	return hash_str(ent->name, ENT_HASHBITS);
+}
+
+void
+nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
+    int *blen)
+{
+ 	struct ent *ent = container_of(ch, struct ent, h);
+
+	qword_add(bpp, blen, ent->authname);
+	qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
+	qword_add(bpp, blen, ent->name);
+
+	(*bpp)[-1] = '\n';
+}
+
+static inline int
+nametoid_match(struct ent *a, struct ent *b)
+{
+	return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
+	    strcmp(a->authname, b->authname) == 0);
+}
+
+static int
+nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
+{
+	struct ent *ent;
+
+	if (h == NULL) {
+		seq_puts(m, "#domain type name [id]\n");
+		return 0;
+	}
+	ent = container_of(h, struct ent, h);
+	seq_printf(m, "%s %s %s", ent->authname,
+			ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
+			ent->name);
+	if (test_bit(CACHE_VALID, &h->flags))
+		seq_printf(m, " %d", ent->id);
+	seq_printf(m, "\n");
+	return 0;
+}
+
+static struct ent *nametoid_lookup(struct ent *, int);
+int                nametoid_parse(struct cache_detail *, char *, int);
+
+struct cache_detail nametoid_cache = {
+	.hash_size	= ENT_HASHMAX,
+	.hash_table	= nametoid_table,
+	.name		= "nfs4.nametoid",
+	.cache_put	= ent_put,
+	.cache_request	= nametoid_request,
+	.cache_parse	= nametoid_parse,
+	.cache_show	= nametoid_show,
+	.warn_no_listener = warn_no_idmapd,
+};
+
+int
+nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+	struct ent ent, *res;
+	char *buf1;
+	int error = -EINVAL;
+
+	if (buf[buflen - 1] != '\n')
+		return (-EINVAL);
+	buf[buflen - 1]= '\0';
+
+	buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (buf1 == NULL)
+		return (-ENOMEM);
+
+	memset(&ent, 0, sizeof(ent));
+
+	/* Authentication name */
+	if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+		goto out;
+	memcpy(ent.authname, buf1, sizeof(ent.authname));
+
+	/* Type */
+	if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+		goto out;
+	ent.type = strcmp(buf1, "user") == 0 ?
+		IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
+
+	/* Name */
+	error = qword_get(&buf, buf1, PAGE_SIZE);
+	if (error <= 0 || error >= IDMAP_NAMESZ)
+		goto out;
+	memcpy(ent.name, buf1, sizeof(ent.name));
+
+	/* expiry */
+	ent.h.expiry_time = get_expiry(&buf);
+	if (ent.h.expiry_time == 0)
+		goto out;
+
+	/* ID */
+	error = get_int(&buf, &ent.id);
+	if (error == -EINVAL)
+		goto out;
+	if (error == -ENOENT)
+		set_bit(CACHE_NEGATIVE, &ent.h.flags);
+
+	error = -ENOMEM;
+	if ((res = nametoid_lookup(&ent, 1)) == NULL)
+		goto out;
+
+	ent_put(&res->h, &nametoid_cache);
+	error = 0;
+out:
+	kfree(buf1);
+
+	return (error);
+}
+
+static DefineSimpleCacheLookupMap(ent, nametoid);
+
+/*
+ * Exported API
+ */
+
+void
+nfsd_idmap_init(void)
+{
+	cache_register(&idtoname_cache);
+	cache_register(&nametoid_cache);
+}
+
+void
+nfsd_idmap_shutdown(void)
+{
+	cache_unregister(&idtoname_cache);
+	cache_unregister(&nametoid_cache);
+}
+
+/*
+ * Deferred request handling
+ */
+
+struct idmap_defer_req {
+       struct cache_req		req;
+       struct cache_deferred_req deferred_req;
+       wait_queue_head_t	waitq;
+       atomic_t			count;
+};
+
+static inline void
+put_mdr(struct idmap_defer_req *mdr)
+{
+	if (atomic_dec_and_test(&mdr->count))
+		kfree(mdr);
+}
+
+static inline void
+get_mdr(struct idmap_defer_req *mdr)
+{
+	atomic_inc(&mdr->count);
+}
+
+static void
+idmap_revisit(struct cache_deferred_req *dreq, int toomany)
+{
+	struct idmap_defer_req *mdr =
+		container_of(dreq, struct idmap_defer_req, deferred_req);
+
+	wake_up(&mdr->waitq);
+	put_mdr(mdr);
+}
+
+static struct cache_deferred_req *
+idmap_defer(struct cache_req *req)
+{
+	struct idmap_defer_req *mdr =
+		container_of(req, struct idmap_defer_req, req);
+
+	mdr->deferred_req.revisit = idmap_revisit;
+	get_mdr(mdr);
+	return (&mdr->deferred_req);
+}
+
+static inline int
+do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+		struct cache_detail *detail, struct ent **item,
+		struct idmap_defer_req *mdr)
+{
+	*item = lookup_fn(key, 0);
+	if (!*item)
+		return -ENOMEM;
+	return cache_check(detail, &(*item)->h, &mdr->req);
+}
+
+static inline int
+do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int),
+			struct ent *key, struct cache_detail *detail,
+			struct ent **item)
+{
+	int ret = -ENOMEM;
+
+	*item = lookup_fn(key, 0);
+	if (!*item)
+		goto out_err;
+	ret = -ETIMEDOUT;
+	if (!test_bit(CACHE_VALID, &(*item)->h.flags)
+			|| (*item)->h.expiry_time < get_seconds()
+			|| detail->flush_time > (*item)->h.last_refresh)
+		goto out_put;
+	ret = -ENOENT;
+	if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
+		goto out_put;
+	return 0;
+out_put:
+	ent_put(&(*item)->h, detail);
+out_err:
+	*item = NULL;
+	return ret;
+}
+
+static int
+idmap_lookup(struct svc_rqst *rqstp,
+		struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+		struct cache_detail *detail, struct ent **item)
+{
+	struct idmap_defer_req *mdr;
+	int ret;
+
+	mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
+	if (!mdr)
+		return -ENOMEM;
+	memset(mdr, 0, sizeof(*mdr));
+	atomic_set(&mdr->count, 1);
+	init_waitqueue_head(&mdr->waitq);
+	mdr->req.defer = idmap_defer;
+	ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr);
+	if (ret == -EAGAIN) {
+		wait_event_interruptible_timeout(mdr->waitq,
+			test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ);
+		ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item);
+	}
+	put_mdr(mdr);
+	return ret;
+}
+
+static int
+idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
+		uid_t *id)
+{
+	struct ent *item, key = {
+		.type = type,
+	};
+	int ret;
+
+	if (namelen + 1 > sizeof(key.name))
+		return -EINVAL;
+	memcpy(key.name, name, namelen);
+	key.name[namelen] = '\0';
+	strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
+	ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item);
+	if (ret == -ENOENT)
+		ret = -ESRCH; /* nfserr_badname */
+	if (ret)
+		return ret;
+	*id = item->id;
+	ent_put(&item->h, &nametoid_cache);
+	return 0;
+}
+
+static int
+idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+{
+	struct ent *item, key = {
+		.id = id,
+		.type = type,
+	};
+	int ret;
+
+	strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
+	ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item);
+	if (ret == -ENOENT)
+		return sprintf(name, "%u", id);
+	if (ret)
+		return ret;
+	ret = strlen(item->name);
+	BUG_ON(ret > IDMAP_NAMESZ);
+	memcpy(name, item->name, ret);
+	ent_put(&item->h, &idtoname_cache);
+	return ret;
+}
+
+int
+nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
+		__u32 *id)
+{
+	return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
+}
+
+int
+nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
+		__u32 *id)
+{
+	return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
+}
+
+int
+nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+{
+	return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
+}
+
+int
+nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+{
+	return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
+}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
new file mode 100644
index 000000000000..e8158741e8b5
--- /dev/null
+++ b/fs/nfsd/nfs4proc.c
@@ -0,0 +1,984 @@
+/*
+ *  fs/nfsd/nfs4proc.c
+ *
+ *  Server-side procedures for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Note: some routines in this file are just trivial wrappers
+ * (e.g. nfsd4_lookup()) defined solely for the sake of consistent
+ * naming.  Since all such routines have been declared "inline",
+ * there shouldn't be any associated overhead.  At some point in
+ * the future, I might inline these "by hand" to clean up a
+ * little.
+ */
+
+#include <linux/param.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/nfs4_acl.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_PROC
+
+static inline void
+fh_dup2(struct svc_fh *dst, struct svc_fh *src)
+{
+	fh_put(dst);
+	dget(src->fh_dentry);
+	if (src->fh_export)
+		cache_get(&src->fh_export->h);
+	*dst = *src;
+}
+
+static int
+do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	int accmode, status;
+
+	if (open->op_truncate &&
+		!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+		return nfserr_inval;
+
+	accmode = MAY_NOP;
+	if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
+		accmode = MAY_READ;
+	if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
+		accmode |= (MAY_WRITE | MAY_TRUNC);
+	accmode |= MAY_OWNER_OVERRIDE;
+
+	status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+
+	return status;
+}
+
+static int
+do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	struct svc_fh resfh;
+	int status;
+
+	fh_init(&resfh, NFS4_FHSIZE);
+	open->op_truncate = 0;
+
+	if (open->op_create) {
+		/*
+		 * Note: create modes (UNCHECKED,GUARDED...) are the same
+		 * in NFSv4 as in v3.
+		 */
+		status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
+					open->op_fname.len, &open->op_iattr,
+					&resfh, open->op_createmode,
+					(u32 *)open->op_verf.data, &open->op_truncate);
+	}
+	else {
+		status = nfsd_lookup(rqstp, current_fh,
+				     open->op_fname.data, open->op_fname.len, &resfh);
+		fh_unlock(current_fh);
+	}
+
+	if (!status) {
+		set_change_info(&open->op_cinfo, current_fh);
+
+		/* set reply cache */
+		fh_dup2(current_fh, &resfh);
+		open->op_stateowner->so_replay.rp_openfh_len =
+			resfh.fh_handle.fh_size;
+		memcpy(open->op_stateowner->so_replay.rp_openfh,
+				&resfh.fh_handle.fh_base,
+				resfh.fh_handle.fh_size);
+
+		status = do_open_permission(rqstp, current_fh, open);
+	}
+
+	fh_put(&resfh);
+	return status;
+}
+
+static int
+do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	int status;
+
+	/* Only reclaims from previously confirmed clients are valid */
+	if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
+		return status;
+
+	/* We don't know the target directory, and therefore can not
+	* set the change info
+	*/
+
+	memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
+
+	/* set replay cache */
+	open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size;
+	memcpy(open->op_stateowner->so_replay.rp_openfh,
+		&current_fh->fh_handle.fh_base,
+		current_fh->fh_handle.fh_size);
+
+	open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
+		(open->op_iattr.ia_size == 0);
+
+	status = do_open_permission(rqstp, current_fh, open);
+
+	return status;
+}
+
+
+static inline int
+nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	int status;
+	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
+		(int)open->op_fname.len, open->op_fname.data,
+		open->op_stateowner);
+
+	if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+		return nfserr_grace;
+
+	if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+		return nfserr_no_grace;
+
+	/* This check required by spec. */
+	if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
+		return nfserr_inval;
+
+	nfs4_lock_state();
+
+	/* check seqid for replay. set nfs4_owner */
+	status = nfsd4_process_open1(open);
+	if (status == NFSERR_REPLAY_ME) {
+		struct nfs4_replay *rp = &open->op_stateowner->so_replay;
+		fh_put(current_fh);
+		current_fh->fh_handle.fh_size = rp->rp_openfh_len;
+		memcpy(&current_fh->fh_handle.fh_base, rp->rp_openfh,
+				rp->rp_openfh_len);
+		status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+		if (status)
+			dprintk("nfsd4_open: replay failed"
+				" restoring previous filehandle\n");
+		else
+			status = NFSERR_REPLAY_ME;
+	}
+	if (status)
+		goto out;
+	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_NULL:
+			/*
+			 * (1) set CURRENT_FH to the file being opened,
+			 * creating it if necessary, (2) set open->op_cinfo,
+			 * (3) set open->op_truncate if the file is to be
+			 * truncated after opening, (4) do permission checking.
+			 */
+			status = do_open_lookup(rqstp, current_fh, open);
+			if (status)
+				goto out;
+			break;
+		case NFS4_OPEN_CLAIM_PREVIOUS:
+			/*
+			 * The CURRENT_FH is already set to the file being
+			 * opened.  (1) set open->op_cinfo, (2) set
+			 * open->op_truncate if the file is to be truncated
+			 * after opening, (3) do permission checking.
+			*/
+			status = do_open_fhandle(rqstp, current_fh, open);
+			if (status)
+				goto out;
+			break;
+		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+             	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+			printk("NFSD: unsupported OPEN claim type %d\n",
+				open->op_claim_type);
+			status = nfserr_notsupp;
+			goto out;
+		default:
+			printk("NFSD: Invalid OPEN claim type %d\n",
+				open->op_claim_type);
+			status = nfserr_inval;
+			goto out;
+	}
+	/*
+	 * nfsd4_process_open2() does the actual opening of the file.  If
+	 * successful, it (1) truncates the file if open->op_truncate was
+	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+	 */
+	status = nfsd4_process_open2(rqstp, current_fh, open);
+out:
+	if (open->op_stateowner)
+		nfs4_get_stateowner(open->op_stateowner);
+	nfs4_unlock_state();
+	return status;
+}
+
+/*
+ * filehandle-manipulating ops.
+ */
+static inline int
+nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
+{
+	if (!current_fh->fh_dentry)
+		return nfserr_nofilehandle;
+
+	*getfh = current_fh;
+	return nfs_ok;
+}
+
+static inline int
+nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putfh *putfh)
+{
+	fh_put(current_fh);
+	current_fh->fh_handle.fh_size = putfh->pf_fhlen;
+	memcpy(&current_fh->fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen);
+	return fh_verify(rqstp, current_fh, 0, MAY_NOP);
+}
+
+static inline int
+nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+{
+	int status;
+
+	fh_put(current_fh);
+	status = exp_pseudoroot(rqstp->rq_client, current_fh,
+			      &rqstp->rq_chandle);
+	if (!status)
+		status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
+	return status;
+}
+
+static inline int
+nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+{
+	if (!save_fh->fh_dentry)
+		return nfserr_restorefh;
+
+	fh_dup2(current_fh, save_fh);
+	return nfs_ok;
+}
+
+static inline int
+nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+{
+	if (!current_fh->fh_dentry)
+		return nfserr_nofilehandle;
+
+	fh_dup2(save_fh, current_fh);
+	return nfs_ok;
+}
+
+/*
+ * misc nfsv4 ops
+ */
+static inline int
+nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_access *access)
+{
+	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
+		return nfserr_inval;
+
+	access->ac_resp_access = access->ac_req_access;
+	return nfsd_access(rqstp, current_fh, &access->ac_resp_access, &access->ac_supported);
+}
+
+static inline int
+nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit)
+{
+	int status;
+
+	u32 *p = (u32 *)commit->co_verf.data;
+	*p++ = nfssvc_boot.tv_sec;
+	*p++ = nfssvc_boot.tv_usec;
+
+	status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count);
+	if (status == nfserr_symlink)
+		status = nfserr_inval;
+	return status;
+}
+
+static int
+nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create)
+{
+	struct svc_fh resfh;
+	int status;
+	dev_t rdev;
+
+	fh_init(&resfh, NFS4_FHSIZE);
+
+	status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE);
+	if (status == nfserr_symlink)
+		status = nfserr_notdir;
+	if (status)
+		return status;
+
+	switch (create->cr_type) {
+	case NF4LNK:
+		/* ugh! we have to null-terminate the linktext, or
+		 * vfs_symlink() will choke.  it is always safe to
+		 * null-terminate by brute force, since at worst we
+		 * will overwrite the first byte of the create namelen
+		 * in the XDR buffer, which has already been extracted
+		 * during XDR decode.
+		 */
+		create->cr_linkname[create->cr_linklen] = 0;
+
+		status = nfsd_symlink(rqstp, current_fh, create->cr_name,
+				      create->cr_namelen, create->cr_linkname,
+				      create->cr_linklen, &resfh, &create->cr_iattr);
+		break;
+
+	case NF4BLK:
+		rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
+		if (MAJOR(rdev) != create->cr_specdata1 ||
+		    MINOR(rdev) != create->cr_specdata2)
+			return nfserr_inval;
+		status = nfsd_create(rqstp, current_fh, create->cr_name,
+				     create->cr_namelen, &create->cr_iattr,
+				     S_IFBLK, rdev, &resfh);
+		break;
+
+	case NF4CHR:
+		rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
+		if (MAJOR(rdev) != create->cr_specdata1 ||
+		    MINOR(rdev) != create->cr_specdata2)
+			return nfserr_inval;
+		status = nfsd_create(rqstp, current_fh, create->cr_name,
+				     create->cr_namelen, &create->cr_iattr,
+				     S_IFCHR, rdev, &resfh);
+		break;
+
+	case NF4SOCK:
+		status = nfsd_create(rqstp, current_fh, create->cr_name,
+				     create->cr_namelen, &create->cr_iattr,
+				     S_IFSOCK, 0, &resfh);
+		break;
+
+	case NF4FIFO:
+		status = nfsd_create(rqstp, current_fh, create->cr_name,
+				     create->cr_namelen, &create->cr_iattr,
+				     S_IFIFO, 0, &resfh);
+		break;
+
+	case NF4DIR:
+		create->cr_iattr.ia_valid &= ~ATTR_SIZE;
+		status = nfsd_create(rqstp, current_fh, create->cr_name,
+				     create->cr_namelen, &create->cr_iattr,
+				     S_IFDIR, 0, &resfh);
+		break;
+
+	default:
+		status = nfserr_badtype;
+	}
+
+	if (!status) {
+		fh_unlock(current_fh);
+		set_change_info(&create->cr_cinfo, current_fh);
+		fh_dup2(current_fh, &resfh);
+	}
+
+	fh_put(&resfh);
+	return status;
+}
+
+static inline int
+nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_getattr *getattr)
+{
+	int status;
+
+	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+	if (status)
+		return status;
+
+	if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+		return nfserr_inval;
+
+	getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0;
+	getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1;
+
+	getattr->ga_fhp = current_fh;
+	return nfs_ok;
+}
+
+static inline int
+nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+	   struct svc_fh *save_fh, struct nfsd4_link *link)
+{
+	int status = nfserr_nofilehandle;
+
+	if (!save_fh->fh_dentry)
+		return status;
+	status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh);
+	if (!status)
+		set_change_info(&link->li_cinfo, current_fh);
+	return status;
+}
+
+static int
+nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+{
+	struct svc_fh tmp_fh;
+	int ret;
+
+	fh_init(&tmp_fh, NFS4_FHSIZE);
+	if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
+			      &rqstp->rq_chandle)) != 0)
+		return ret;
+	if (tmp_fh.fh_dentry == current_fh->fh_dentry) {
+		fh_put(&tmp_fh);
+		return nfserr_noent;
+	}
+	fh_put(&tmp_fh);
+	return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh);
+}
+
+static inline int
+nfsd4_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lookup *lookup)
+{
+	return nfsd_lookup(rqstp, current_fh, lookup->lo_name, lookup->lo_len, current_fh);
+}
+
+static inline int
+nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
+{
+	int status;
+	struct file *filp = NULL;
+
+	/* no need to check permission - this will be done in nfsd_read() */
+
+	if (read->rd_offset >= OFFSET_MAX)
+		return nfserr_inval;
+
+	nfs4_lock_state();
+	/* check stateid */
+	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
+					CHECK_FH | RD_STATE, &filp))) {
+		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
+		goto out;
+	}
+	status = nfs_ok;
+out:
+	nfs4_unlock_state();
+	read->rd_rqstp = rqstp;
+	read->rd_fhp = current_fh;
+	read->rd_filp = filp;
+	return status;
+}
+
+static inline int
+nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir)
+{
+	u64 cookie = readdir->rd_cookie;
+	static const nfs4_verifier zeroverf;
+
+	/* no need to check permission - this will be done in nfsd_readdir() */
+
+	if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+		return nfserr_inval;
+
+	readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0;
+	readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1;
+
+	if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) ||
+	    (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
+		return nfserr_bad_cookie;
+
+	readdir->rd_rqstp = rqstp;
+	readdir->rd_fhp = current_fh;
+	return nfs_ok;
+}
+
+static inline int
+nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readlink *readlink)
+{
+	readlink->rl_rqstp = rqstp;
+	readlink->rl_fhp = current_fh;
+	return nfs_ok;
+}
+
+static inline int
+nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_remove *remove)
+{
+	int status;
+
+	status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
+	if (status == nfserr_symlink)
+		return nfserr_notdir;
+	if (!status) {
+		fh_unlock(current_fh);
+		set_change_info(&remove->rm_cinfo, current_fh);
+	}
+	return status;
+}
+
+static inline int
+nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+	     struct svc_fh *save_fh, struct nfsd4_rename *rename)
+{
+	int status = nfserr_nofilehandle;
+
+	if (!save_fh->fh_dentry)
+		return status;
+	status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
+			     rename->rn_snamelen, current_fh,
+			     rename->rn_tname, rename->rn_tnamelen);
+
+	/* the underlying filesystem returns different error's than required
+	 * by NFSv4. both save_fh and current_fh have been verified.. */
+	if (status == nfserr_isdir)
+		status = nfserr_exist;
+	else if ((status == nfserr_notdir) &&
+                  (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) &&
+                   S_ISDIR(current_fh->fh_dentry->d_inode->i_mode)))
+		status = nfserr_exist;
+	else if (status == nfserr_symlink)
+		status = nfserr_notdir;
+
+	if (!status) {
+		set_change_info(&rename->rn_sinfo, current_fh);
+		set_change_info(&rename->rn_tinfo, save_fh);
+	}
+	return status;
+}
+
+static inline int
+nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
+{
+	int status = nfs_ok;
+
+	if (!current_fh->fh_dentry)
+		return nfserr_nofilehandle;
+
+	status = nfs_ok;
+	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+		nfs4_lock_state();
+		if ((status = nfs4_preprocess_stateid_op(current_fh,
+						&setattr->sa_stateid,
+						CHECK_FH | WR_STATE, NULL))) {
+			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
+			goto out_unlock;
+		}
+		nfs4_unlock_state();
+	}
+	status = nfs_ok;
+	if (setattr->sa_acl != NULL)
+		status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
+	if (status)
+		goto out;
+	status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
+				0, (time_t)0);
+out:
+	return status;
+out_unlock:
+	nfs4_unlock_state();
+	return status;
+}
+
+static inline int
+nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
+{
+	stateid_t *stateid = &write->wr_stateid;
+	struct file *filp = NULL;
+	u32 *p;
+	int status = nfs_ok;
+
+	/* no need to check permission - this will be done in nfsd_write() */
+
+	if (write->wr_offset >= OFFSET_MAX)
+		return nfserr_inval;
+
+	nfs4_lock_state();
+	if ((status = nfs4_preprocess_stateid_op(current_fh, stateid,
+					CHECK_FH | WR_STATE, &filp))) {
+		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+		goto out;
+	}
+	nfs4_unlock_state();
+
+	write->wr_bytes_written = write->wr_buflen;
+	write->wr_how_written = write->wr_stable_how;
+	p = (u32 *)write->wr_verifier.data;
+	*p++ = nfssvc_boot.tv_sec;
+	*p++ = nfssvc_boot.tv_usec;
+
+	status =  nfsd_write(rqstp, current_fh, filp, write->wr_offset,
+			write->wr_vec, write->wr_vlen, write->wr_buflen,
+			&write->wr_how_written);
+
+	if (status == nfserr_symlink)
+		status = nfserr_inval;
+	return status;
+out:
+	nfs4_unlock_state();
+	return status;
+}
+
+/* This routine never returns NFS_OK!  If there are no other errors, it
+ * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
+ * attributes matched.  VERIFY is implemented by mapping NFSERR_SAME
+ * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
+ */
+static int
+nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
+{
+	u32 *buf, *p;
+	int count;
+	int status;
+
+	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+	if (status)
+		return status;
+
+	if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0)
+	    || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
+		return nfserr_attrnotsupp;
+	if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
+	    || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
+		return nfserr_inval;
+	if (verify->ve_attrlen & 3)
+		return nfserr_inval;
+
+	/* count in words:
+	 *   bitmap_len(1) + bitmap(2) + attr_len(1) = 4
+	 */
+	count = 4 + (verify->ve_attrlen >> 2);
+	buf = kmalloc(count << 2, GFP_KERNEL);
+	if (!buf)
+		return nfserr_resource;
+
+	status = nfsd4_encode_fattr(current_fh, current_fh->fh_export,
+				    current_fh->fh_dentry, buf,
+				    &count, verify->ve_bmval,
+				    rqstp);
+
+	/* this means that nfsd4_encode_fattr() ran out of space */
+	if (status == nfserr_resource && count == 0)
+		status = nfserr_not_same;
+	if (status)
+		goto out_kfree;
+
+	p = buf + 3;
+	status = nfserr_not_same;
+	if (ntohl(*p++) != verify->ve_attrlen)
+		goto out_kfree;
+	if (!memcmp(p, verify->ve_attrval, verify->ve_attrlen))
+		status = nfserr_same;
+
+out_kfree:
+	kfree(buf);
+	return status;
+}
+
+/*
+ * NULL call.
+ */
+static int
+nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+
+/*
+ * COMPOUND call.
+ */
+static int
+nfsd4_proc_compound(struct svc_rqst *rqstp,
+		    struct nfsd4_compoundargs *args,
+		    struct nfsd4_compoundres *resp)
+{
+	struct nfsd4_op	*op;
+	struct svc_fh	*current_fh = NULL;
+	struct svc_fh	*save_fh = NULL;
+	struct nfs4_stateowner *replay_owner = NULL;
+	int		slack_space;    /* in words, not bytes! */
+	int		status;
+
+	status = nfserr_resource;
+	current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL);
+	if (current_fh == NULL)
+		goto out;
+	fh_init(current_fh, NFS4_FHSIZE);
+	save_fh = kmalloc(sizeof(*save_fh), GFP_KERNEL);
+	if (save_fh == NULL)
+		goto out;
+	fh_init(save_fh, NFS4_FHSIZE);
+
+	resp->xbuf = &rqstp->rq_res;
+	resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
+	resp->tagp = resp->p;
+	/* reserve space for: taglen, tag, and opcnt */
+	resp->p += 2 + XDR_QUADLEN(args->taglen);
+	resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE;
+	resp->taglen = args->taglen;
+	resp->tag = args->tag;
+	resp->opcnt = 0;
+	resp->rqstp = rqstp;
+
+	/*
+	 * According to RFC3010, this takes precedence over all other errors.
+	 */
+	status = nfserr_minor_vers_mismatch;
+	if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+		goto out;
+
+	status = nfs_ok;
+	while (!status && resp->opcnt < args->opcnt) {
+		op = &args->ops[resp->opcnt++];
+
+		/*
+		 * The XDR decode routines may have pre-set op->status;
+		 * for example, if there is a miscellaneous XDR error
+		 * it will be set to nfserr_bad_xdr.
+		 */
+		if (op->status)
+			goto encode_op;
+
+		/* We must be able to encode a successful response to
+		 * this operation, with enough room left over to encode a
+		 * failed response to the next operation.  If we don't
+		 * have enough room, fail with ERR_RESOURCE.
+		 */
+/* FIXME - is slack_space *really* words, or bytes??? - neilb */
+		slack_space = (char *)resp->end - (char *)resp->p;
+		if (slack_space < COMPOUND_SLACK_SPACE + COMPOUND_ERR_SLACK_SPACE) {
+			BUG_ON(slack_space < COMPOUND_ERR_SLACK_SPACE);
+			op->status = nfserr_resource;
+			goto encode_op;
+		}
+
+		/* All operations except RENEW, SETCLIENTID, RESTOREFH
+		* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
+		* require a valid current filehandle
+		*
+		* SETATTR NOFILEHANDLE error handled in nfsd4_setattr
+		* due to required returned bitmap argument
+		*/
+		if ((!current_fh->fh_dentry) &&
+		   !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
+		   (op->opnum == OP_SETCLIENTID) ||
+		   (op->opnum == OP_SETCLIENTID_CONFIRM) ||
+		   (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
+		   (op->opnum == OP_RELEASE_LOCKOWNER) ||
+		   (op->opnum == OP_SETATTR))) {
+			op->status = nfserr_nofilehandle;
+			goto encode_op;
+		}
+		switch (op->opnum) {
+		case OP_ACCESS:
+			op->status = nfsd4_access(rqstp, current_fh, &op->u.access);
+			break;
+		case OP_CLOSE:
+			op->status = nfsd4_close(rqstp, current_fh, &op->u.close);
+			replay_owner = op->u.close.cl_stateowner;
+			break;
+		case OP_COMMIT:
+			op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit);
+			break;
+		case OP_CREATE:
+			op->status = nfsd4_create(rqstp, current_fh, &op->u.create);
+			break;
+		case OP_DELEGRETURN:
+			op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn);
+			break;
+		case OP_GETATTR:
+			op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr);
+			break;
+		case OP_GETFH:
+			op->status = nfsd4_getfh(current_fh, &op->u.getfh);
+			break;
+		case OP_LINK:
+			op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link);
+			break;
+		case OP_LOCK:
+			op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock);
+			replay_owner = op->u.lock.lk_stateowner;
+			break;
+		case OP_LOCKT:
+			op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt);
+			break;
+		case OP_LOCKU:
+			op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku);
+			replay_owner = op->u.locku.lu_stateowner;
+			break;
+		case OP_LOOKUP:
+			op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup);
+			break;
+		case OP_LOOKUPP:
+			op->status = nfsd4_lookupp(rqstp, current_fh);
+			break;
+		case OP_NVERIFY:
+			op->status = nfsd4_verify(rqstp, current_fh, &op->u.nverify);
+			if (op->status == nfserr_not_same)
+				op->status = nfs_ok;
+			break;
+		case OP_OPEN:
+			op->status = nfsd4_open(rqstp, current_fh, &op->u.open);
+			replay_owner = op->u.open.op_stateowner;
+			break;
+		case OP_OPEN_CONFIRM:
+			op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm);
+			replay_owner = op->u.open_confirm.oc_stateowner;
+			break;
+		case OP_OPEN_DOWNGRADE:
+			op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade);
+			replay_owner = op->u.open_downgrade.od_stateowner;
+			break;
+		case OP_PUTFH:
+			op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh);
+			break;
+		case OP_PUTROOTFH:
+			op->status = nfsd4_putrootfh(rqstp, current_fh);
+			break;
+		case OP_READ:
+			op->status = nfsd4_read(rqstp, current_fh, &op->u.read);
+			break;
+		case OP_READDIR:
+			op->status = nfsd4_readdir(rqstp, current_fh, &op->u.readdir);
+			break;
+		case OP_READLINK:
+			op->status = nfsd4_readlink(rqstp, current_fh, &op->u.readlink);
+			break;
+		case OP_REMOVE:
+			op->status = nfsd4_remove(rqstp, current_fh, &op->u.remove);
+			break;
+		case OP_RENAME:
+			op->status = nfsd4_rename(rqstp, current_fh, save_fh, &op->u.rename);
+			break;
+		case OP_RENEW:
+			op->status = nfsd4_renew(&op->u.renew);
+			break;
+		case OP_RESTOREFH:
+			op->status = nfsd4_restorefh(current_fh, save_fh);
+			break;
+		case OP_SAVEFH:
+			op->status = nfsd4_savefh(current_fh, save_fh);
+			break;
+		case OP_SETATTR:
+			op->status = nfsd4_setattr(rqstp, current_fh, &op->u.setattr);
+			break;
+		case OP_SETCLIENTID:
+			op->status = nfsd4_setclientid(rqstp, &op->u.setclientid);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			op->status = nfsd4_setclientid_confirm(rqstp, &op->u.setclientid_confirm);
+			break;
+		case OP_VERIFY:
+			op->status = nfsd4_verify(rqstp, current_fh, &op->u.verify);
+			if (op->status == nfserr_same)
+				op->status = nfs_ok;
+			break;
+		case OP_WRITE:
+			op->status = nfsd4_write(rqstp, current_fh, &op->u.write);
+			break;
+		case OP_RELEASE_LOCKOWNER:
+			op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner);
+			break;
+		default:
+			BUG_ON(op->status == nfs_ok);
+			break;
+		}
+
+encode_op:
+		if (op->status == NFSERR_REPLAY_ME) {
+			op->replay = &replay_owner->so_replay;
+			nfsd4_encode_replay(resp, op);
+			status = op->status = op->replay->rp_status;
+		} else {
+			nfsd4_encode_operation(resp, op);
+			status = op->status;
+		}
+		if (replay_owner && (replay_owner != (void *)(-1))) {
+			nfs4_put_stateowner(replay_owner);
+			replay_owner = NULL;
+		}
+	}
+
+out:
+	nfsd4_release_compoundargs(args);
+	if (current_fh)
+		fh_put(current_fh);
+	kfree(current_fh);
+	if (save_fh)
+		fh_put(save_fh);
+	kfree(save_fh);
+	return status;
+}
+
+#define nfs4svc_decode_voidargs		NULL
+#define nfs4svc_release_void		NULL
+#define nfsd4_voidres			nfsd4_voidargs
+#define nfs4svc_release_compound	NULL
+struct nfsd4_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsd4_proc_##name,		\
+   (kxdrproc_t) nfs4svc_decode_##argt##args,	\
+   (kxdrproc_t) nfs4svc_encode_##rest##res,	\
+   (kxdrproc_t) nfs4svc_release_##relt,		\
+   sizeof(struct nfsd4_##argt##args),		\
+   sizeof(struct nfsd4_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+/*
+ * TODO: At the present time, the NFSv4 server does not do XID caching
+ * of requests.  Implementing XID caching would not be a serious problem,
+ * although it would require a mild change in interfaces since one
+ * doesn't know whether an NFSv4 request is idempotent until after the
+ * XDR decode.  However, XID caching totally confuses pynfs (Peter
+ * Astrand's regression testsuite for NFSv4 servers), which reuses
+ * XID's liberally, so I've left it unimplemented until pynfs generates
+ * better XID's.
+ */
+static struct svc_procedure		nfsd_procedures4[2] = {
+  PROC(null,	 void,		void,		void,	  RC_NOCACHE, 1),
+  PROC(compound, compound,	compound,	compound, RC_NOCACHE, NFSD_BUFSIZE)
+};
+
+struct svc_version	nfsd_version4 = {
+		.vs_vers	= 4,
+		.vs_nproc	= 2,
+		.vs_proc	= nfsd_procedures4,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS4_SVC_XDRSIZE,
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
new file mode 100644
index 000000000000..579f7fea7968
--- /dev/null
+++ b/fs/nfsd/nfs4state.c
@@ -0,0 +1,3320 @@
+/*
+*  linux/fs/nfsd/nfs4state.c
+*
+*  Copyright (c) 2001 The Regents of the University of Michigan.
+*  All rights reserved.
+*
+*  Kendrick Smith <kmsmith@umich.edu>
+*  Andy Adamson <kandros@umich.edu>
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions
+*  are met:
+*
+*  1. Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*  2. Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in the
+*     documentation and/or other materials provided with the distribution.
+*  3. Neither the name of the University nor the names of its
+*     contributors may be used to endorse or promote products derived
+*     from this software without specific prior written permission.
+*
+*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <linux/param.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/mount.h>
+#include <linux/workqueue.h>
+#include <linux/smp_lock.h>
+#include <linux/kthread.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+/* Globals */
+static time_t lease_time = 90;     /* default lease time */
+static time_t old_lease_time = 90; /* past incarnation lease time */
+static u32 nfs4_reclaim_init = 0;
+time_t boot_time;
+static time_t grace_end = 0;
+static u32 current_clientid = 1;
+static u32 current_ownerid = 1;
+static u32 current_fileid = 1;
+static u32 current_delegid = 1;
+static u32 nfs4_init;
+stateid_t zerostateid;             /* bits all 0 */
+stateid_t onestateid;              /* bits all 1 */
+
+/* debug counters */
+u32 list_add_perfile = 0; 
+u32 list_del_perfile = 0;
+u32 add_perclient = 0;
+u32 del_perclient = 0;
+u32 alloc_file = 0;
+u32 free_file = 0;
+u32 vfsopen = 0;
+u32 vfsclose = 0;
+u32 alloc_delegation= 0;
+u32 free_delegation= 0;
+
+/* forward declarations */
+struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
+
+/* Locking:
+ *
+ * client_sema: 
+ * 	protects clientid_hashtbl[], clientstr_hashtbl[],
+ * 	unconfstr_hashtbl[], uncofid_hashtbl[].
+ */
+static DECLARE_MUTEX(client_sema);
+
+void
+nfs4_lock_state(void)
+{
+	down(&client_sema);
+}
+
+void
+nfs4_unlock_state(void)
+{
+	up(&client_sema);
+}
+
+static inline u32
+opaque_hashval(const void *ptr, int nbytes)
+{
+	unsigned char *cptr = (unsigned char *) ptr;
+
+	u32 x = 0;
+	while (nbytes--) {
+		x *= 37;
+		x += *cptr++;
+	}
+	return x;
+}
+
+/* forward declarations */
+static void release_stateowner(struct nfs4_stateowner *sop);
+static void release_stateid(struct nfs4_stateid *stp, int flags);
+static void release_file(struct nfs4_file *fp);
+
+/*
+ * Delegation state
+ */
+
+/* recall_lock protects the del_recall_lru */
+spinlock_t recall_lock;
+static struct list_head del_recall_lru;
+
+static struct nfs4_delegation *
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
+{
+	struct nfs4_delegation *dp;
+	struct nfs4_file *fp = stp->st_file;
+	struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
+
+	dprintk("NFSD alloc_init_deleg\n");
+	if ((dp = kmalloc(sizeof(struct nfs4_delegation),
+		GFP_KERNEL)) == NULL)
+		return dp;
+	INIT_LIST_HEAD(&dp->dl_del_perfile);
+	INIT_LIST_HEAD(&dp->dl_del_perclnt);
+	INIT_LIST_HEAD(&dp->dl_recall_lru);
+	dp->dl_client = clp;
+	dp->dl_file = fp;
+	dp->dl_flock = NULL;
+	get_file(stp->st_vfs_file);
+	dp->dl_vfs_file = stp->st_vfs_file;
+	dp->dl_type = type;
+	dp->dl_recall.cbr_dp = NULL;
+	dp->dl_recall.cbr_ident = cb->cb_ident;
+	dp->dl_recall.cbr_trunc = 0;
+	dp->dl_stateid.si_boot = boot_time;
+	dp->dl_stateid.si_stateownerid = current_delegid++;
+	dp->dl_stateid.si_fileid = 0;
+	dp->dl_stateid.si_generation = 0;
+	dp->dl_fhlen = current_fh->fh_handle.fh_size;
+	memcpy(dp->dl_fhval, &current_fh->fh_handle.fh_base,
+		        current_fh->fh_handle.fh_size);
+	dp->dl_time = 0;
+	atomic_set(&dp->dl_count, 1);
+	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+	alloc_delegation++;
+	return dp;
+}
+
+void
+nfs4_put_delegation(struct nfs4_delegation *dp)
+{
+	if (atomic_dec_and_test(&dp->dl_count)) {
+		dprintk("NFSD: freeing dp %p\n",dp);
+		kfree(dp);
+		free_delegation++;
+	}
+}
+
+/* Remove the associated file_lock first, then remove the delegation.
+ * lease_modify() is called to remove the FS_LEASE file_lock from
+ * the i_flock list, eventually calling nfsd's lock_manager
+ * fl_release_callback.
+ */
+static void
+nfs4_close_delegation(struct nfs4_delegation *dp)
+{
+	struct file *filp = dp->dl_vfs_file;
+
+	dprintk("NFSD: close_delegation dp %p\n",dp);
+	dp->dl_vfs_file = NULL;
+	/* The following nfsd_close may not actually close the file,
+	 * but we want to remove the lease in any case. */
+	setlease(filp, F_UNLCK, &dp->dl_flock);
+	nfsd_close(filp);
+	vfsclose++;
+}
+
+/* Called under the state lock. */
+static void
+unhash_delegation(struct nfs4_delegation *dp)
+{
+	list_del_init(&dp->dl_del_perfile);
+	list_del_init(&dp->dl_del_perclnt);
+	spin_lock(&recall_lock);
+	list_del_init(&dp->dl_recall_lru);
+	spin_unlock(&recall_lock);
+	nfs4_close_delegation(dp);
+	nfs4_put_delegation(dp);
+}
+
+/* 
+ * SETCLIENTID state 
+ */
+
+/* Hash tables for nfs4_clientid state */
+#define CLIENT_HASH_BITS                 4
+#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
+#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
+
+#define clientid_hashval(id) \
+	((id) & CLIENT_HASH_MASK)
+#define clientstr_hashval(name, namelen) \
+	(opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
+/*
+ * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+ * used in reboot/reset lease grace period processing
+ *
+ * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+ * setclientid_confirmed info. 
+ *
+ * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+ * setclientid info.
+ *
+ * client_lru holds client queue ordered by nfs4_client.cl_time
+ * for lease renewal.
+ *
+ * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
+ * for last close replay.
+ */
+static struct list_head	reclaim_str_hashtbl[CLIENT_HASH_SIZE];
+static int reclaim_str_hashtbl_size = 0;
+static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head	conf_str_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head	unconf_str_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head	unconf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head client_lru;
+static struct list_head close_lru;
+
+static inline void
+renew_client(struct nfs4_client *clp)
+{
+	/*
+	* Move client to the end to the LRU list.
+	*/
+	dprintk("renewing client (clientid %08x/%08x)\n", 
+			clp->cl_clientid.cl_boot, 
+			clp->cl_clientid.cl_id);
+	list_move_tail(&clp->cl_lru, &client_lru);
+	clp->cl_time = get_seconds();
+}
+
+/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
+static int
+STALE_CLIENTID(clientid_t *clid)
+{
+	if (clid->cl_boot == boot_time)
+		return 0;
+	dprintk("NFSD stale clientid (%08x/%08x)\n", 
+			clid->cl_boot, clid->cl_id);
+	return 1;
+}
+
+/* 
+ * XXX Should we use a slab cache ?
+ * This type of memory management is somewhat inefficient, but we use it
+ * anyway since SETCLIENTID is not a common operation.
+ */
+static inline struct nfs4_client *
+alloc_client(struct xdr_netobj name)
+{
+	struct nfs4_client *clp;
+
+	if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
+		memset(clp, 0, sizeof(*clp));
+		if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
+			memcpy(clp->cl_name.data, name.data, name.len);
+			clp->cl_name.len = name.len;
+		}
+		else {
+			kfree(clp);
+			clp = NULL;
+		}
+	}
+	return clp;
+}
+
+static inline void
+free_client(struct nfs4_client *clp)
+{
+	if (clp->cl_cred.cr_group_info)
+		put_group_info(clp->cl_cred.cr_group_info);
+	kfree(clp->cl_name.data);
+	kfree(clp);
+}
+
+void
+put_nfs4_client(struct nfs4_client *clp)
+{
+	if (atomic_dec_and_test(&clp->cl_count))
+		free_client(clp);
+}
+
+static void
+expire_client(struct nfs4_client *clp)
+{
+	struct nfs4_stateowner *sop;
+	struct nfs4_delegation *dp;
+	struct nfs4_callback *cb = &clp->cl_callback;
+	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+	struct list_head reaplist;
+
+	dprintk("NFSD: expire_client cl_count %d\n",
+	                    atomic_read(&clp->cl_count));
+
+	/* shutdown rpc client, ending any outstanding recall rpcs */
+	if (atomic_read(&cb->cb_set) == 1 && clnt) {
+		rpc_shutdown_client(clnt);
+		clnt = clp->cl_callback.cb_client = NULL;
+	}
+
+	INIT_LIST_HEAD(&reaplist);
+	spin_lock(&recall_lock);
+	while (!list_empty(&clp->cl_del_perclnt)) {
+		dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+		dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
+				dp->dl_flock);
+		list_del_init(&dp->dl_del_perclnt);
+		list_move(&dp->dl_recall_lru, &reaplist);
+	}
+	spin_unlock(&recall_lock);
+	while (!list_empty(&reaplist)) {
+		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
+		list_del_init(&dp->dl_recall_lru);
+		unhash_delegation(dp);
+	}
+	list_del(&clp->cl_idhash);
+	list_del(&clp->cl_strhash);
+	list_del(&clp->cl_lru);
+	while (!list_empty(&clp->cl_perclient)) {
+		sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
+		release_stateowner(sop);
+	}
+	put_nfs4_client(clp);
+}
+
+static struct nfs4_client *
+create_client(struct xdr_netobj name) {
+	struct nfs4_client *clp;
+
+	if (!(clp = alloc_client(name)))
+		goto out;
+	atomic_set(&clp->cl_count, 1);
+	atomic_set(&clp->cl_callback.cb_set, 0);
+	clp->cl_callback.cb_parsed = 0;
+	INIT_LIST_HEAD(&clp->cl_idhash);
+	INIT_LIST_HEAD(&clp->cl_strhash);
+	INIT_LIST_HEAD(&clp->cl_perclient);
+	INIT_LIST_HEAD(&clp->cl_del_perclnt);
+	INIT_LIST_HEAD(&clp->cl_lru);
+out:
+	return clp;
+}
+
+static void
+copy_verf(struct nfs4_client *target, nfs4_verifier *source) {
+	memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data));
+}
+
+static void
+copy_clid(struct nfs4_client *target, struct nfs4_client *source) {
+	target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; 
+	target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
+}
+
+static void
+copy_cred(struct svc_cred *target, struct svc_cred *source) {
+
+	target->cr_uid = source->cr_uid;
+	target->cr_gid = source->cr_gid;
+	target->cr_group_info = source->cr_group_info;
+	get_group_info(target->cr_group_info);
+}
+
+static int
+cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
+	if (!n1 || !n2)
+		return 0;
+	return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
+}
+
+static int
+cmp_verf(nfs4_verifier *v1, nfs4_verifier *v2) {
+	return(!memcmp(v1->data,v2->data,sizeof(v1->data)));
+}
+
+static int
+cmp_clid(clientid_t * cl1, clientid_t * cl2) {
+	return((cl1->cl_boot == cl2->cl_boot) &&
+	   	(cl1->cl_id == cl2->cl_id));
+}
+
+/* XXX what about NGROUP */
+static int
+cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){
+	return(cr1->cr_uid == cr2->cr_uid);
+
+}
+
+static void
+gen_clid(struct nfs4_client *clp) {
+	clp->cl_clientid.cl_boot = boot_time;
+	clp->cl_clientid.cl_id = current_clientid++; 
+}
+
+static void
+gen_confirm(struct nfs4_client *clp) {
+	struct timespec 	tv;
+	u32 *			p;
+
+	tv = CURRENT_TIME;
+	p = (u32 *)clp->cl_confirm.data;
+	*p++ = tv.tv_sec;
+	*p++ = tv.tv_nsec;
+}
+
+static int
+check_name(struct xdr_netobj name) {
+
+	if (name.len == 0) 
+		return 0;
+	if (name.len > NFS4_OPAQUE_LIMIT) {
+		printk("NFSD: check_name: name too long(%d)!\n", name.len);
+		return 0;
+	}
+	return 1;
+}
+
+void
+add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
+{
+	unsigned int idhashval;
+
+	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+	list_add_tail(&clp->cl_lru, &client_lru);
+	clp->cl_time = get_seconds();
+}
+
+void
+move_to_confirmed(struct nfs4_client *clp)
+{
+	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+	unsigned int strhashval;
+
+	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
+	list_del_init(&clp->cl_strhash);
+	list_del_init(&clp->cl_idhash);
+	list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+	strhashval = clientstr_hashval(clp->cl_name.data, 
+			clp->cl_name.len);
+	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+	renew_client(clp);
+}
+
+static struct nfs4_client *
+find_confirmed_client(clientid_t *clid)
+{
+	struct nfs4_client *clp;
+	unsigned int idhashval = clientid_hashval(clid->cl_id);
+
+	list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
+		if (cmp_clid(&clp->cl_clientid, clid))
+			return clp;
+	}
+	return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client(clientid_t *clid)
+{
+	struct nfs4_client *clp;
+	unsigned int idhashval = clientid_hashval(clid->cl_id);
+
+	list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
+		if (cmp_clid(&clp->cl_clientid, clid))
+			return clp;
+	}
+	return NULL;
+}
+
+/* a helper function for parse_callback */
+static int
+parse_octet(unsigned int *lenp, char **addrp)
+{
+	unsigned int len = *lenp;
+	char *p = *addrp;
+	int n = -1;
+	char c;
+
+	for (;;) {
+		if (!len)
+			break;
+		len--;
+		c = *p++;
+		if (c == '.')
+			break;
+		if ((c < '0') || (c > '9')) {
+			n = -1;
+			break;
+		}
+		if (n < 0)
+			n = 0;
+		n = (n * 10) + (c - '0');
+		if (n > 255) {
+			n = -1;
+			break;
+		}
+	}
+	*lenp = len;
+	*addrp = p;
+	return n;
+}
+
+/* parse and set the setclientid ipv4 callback address */
+int
+parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
+{
+	int temp = 0;
+	u32 cbaddr = 0;
+	u16 cbport = 0;
+	u32 addrlen = addr_len;
+	char *addr = addr_val;
+	int i, shift;
+
+	/* ipaddress */
+	shift = 24;
+	for(i = 4; i > 0  ; i--) {
+		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
+			return 0;
+		}
+		cbaddr |= (temp << shift);
+		if (shift > 0)
+		shift -= 8;
+	}
+	*cbaddrp = cbaddr;
+
+	/* port */
+	shift = 8;
+	for(i = 2; i > 0  ; i--) {
+		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
+			return 0;
+		}
+		cbport |= (temp << shift);
+		if (shift > 0)
+			shift -= 8;
+	}
+	*cbportp = cbport;
+	return 1;
+}
+
+void
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
+{
+	struct nfs4_callback *cb = &clp->cl_callback;
+
+	/* Currently, we only support tcp for the callback channel */
+	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
+		goto out_err;
+
+	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
+	                 &cb->cb_addr, &cb->cb_port)))
+		goto out_err;
+	cb->cb_prog = se->se_callback_prog;
+	cb->cb_ident = se->se_callback_ident;
+	cb->cb_parsed = 1;
+	return;
+out_err:
+	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+		"will not receive delegations\n",
+		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+
+	cb->cb_parsed = 0;
+	return;
+}
+
+/*
+ * RFC 3010 has a complex implmentation description of processing a 
+ * SETCLIENTID request consisting of 5 bullets, labeled as 
+ * CASE0 - CASE4 below.
+ *
+ * NOTES:
+ * 	callback information will be processed in a future patch
+ *
+ *	an unconfirmed record is added when:
+ *      NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record.
+ *	CASE 1: confirmed record found with matching name, principal,
+ *		verifier, and clientid.
+ *	CASE 2: confirmed record found with matching name, principal,
+ *		and there is no unconfirmed record with matching
+ *		name and principal
+ *
+ *      an unconfirmed record is replaced when:
+ *	CASE 3: confirmed record found with matching name, principal,
+ *		and an unconfirmed record is found with matching 
+ *		name, principal, and with clientid and
+ *		confirm that does not match the confirmed record.
+ *	CASE 4: there is no confirmed record with matching name and 
+ *		principal. there is an unconfirmed record with 
+ *		matching name, principal.
+ *
+ *	an unconfirmed record is deleted when:
+ *	CASE 1: an unconfirmed record that matches input name, verifier,
+ *		and confirmed clientid.
+ *	CASE 4: any unconfirmed records with matching name and principal
+ *		that exist after an unconfirmed record has been replaced
+ *		as described above.
+ *
+ */
+int
+nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
+{
+	u32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+	struct xdr_netobj 	clname = { 
+		.len = setclid->se_namelen,
+		.data = setclid->se_name,
+	};
+	nfs4_verifier		clverifier = setclid->se_verf;
+	unsigned int 		strhashval;
+	struct nfs4_client *	conf, * unconf, * new, * clp;
+	int 			status;
+	
+	status = nfserr_inval;
+	if (!check_name(clname))
+		goto out;
+
+	/* 
+	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
+	 * We get here on a DRC miss.
+	 */
+
+	strhashval = clientstr_hashval(clname.data, clname.len);
+
+	conf = NULL;
+	nfs4_lock_state();
+	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
+		if (!cmp_name(&clp->cl_name, &clname))
+			continue;
+		/* 
+		 * CASE 0:
+		 * clname match, confirmed, different principal
+		 * or different ip_address
+		 */
+		status = nfserr_clid_inuse;
+		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) {
+			printk("NFSD: setclientid: string in use by client"
+			"(clientid %08x/%08x)\n",
+			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			goto out;
+		}
+		if (clp->cl_addr != ip_addr) { 
+			printk("NFSD: setclientid: string in use by client"
+			"(clientid %08x/%08x)\n",
+			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			goto out;
+		}
+
+		/* 
+	 	 * cl_name match from a previous SETCLIENTID operation
+	 	 * XXX check for additional matches?
+		 */
+		conf = clp;
+		break;
+	}
+	unconf = NULL;
+	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
+		if (!cmp_name(&clp->cl_name, &clname))
+			continue;
+		/* cl_name match from a previous SETCLIENTID operation */
+		unconf = clp;
+		break;
+	}
+	status = nfserr_resource;
+	if (!conf) {
+		/* 
+		 * CASE 4:
+		 * placed first, because it is the normal case.
+		 */
+		if (unconf)
+			expire_client(unconf);
+		if (!(new = create_client(clname)))
+			goto out;
+		copy_verf(new, &clverifier);
+		new->cl_addr = ip_addr;
+		copy_cred(&new->cl_cred,&rqstp->rq_cred);
+		gen_clid(new);
+		gen_confirm(new);
+		gen_callback(new, setclid);
+		add_to_unconfirmed(new, strhashval);
+	} else if (cmp_verf(&conf->cl_verifier, &clverifier)) {
+		/*
+		 * CASE 1:
+		 * cl_name match, confirmed, principal match
+		 * verifier match: probable callback update
+		 *
+		 * remove any unconfirmed nfs4_client with 
+		 * matching cl_name, cl_verifier, and cl_clientid
+		 *
+		 * create and insert an unconfirmed nfs4_client with same 
+		 * cl_name, cl_verifier, and cl_clientid as existing 
+		 * nfs4_client,  but with the new callback info and a 
+		 * new cl_confirm
+		 */
+		if ((unconf) && 
+		    cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) &&
+		     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
+				expire_client(unconf);
+		}
+		if (!(new = create_client(clname)))
+			goto out;
+		copy_verf(new,&conf->cl_verifier);
+		new->cl_addr = ip_addr;
+		copy_cred(&new->cl_cred,&rqstp->rq_cred);
+		copy_clid(new, conf);
+		gen_confirm(new);
+		gen_callback(new, setclid);
+		add_to_unconfirmed(new,strhashval);
+	} else if (!unconf) {
+		/*
+		 * CASE 2:
+		 * clname match, confirmed, principal match
+		 * verfier does not match
+		 * no unconfirmed. create a new unconfirmed nfs4_client
+		 * using input clverifier, clname, and callback info
+		 * and generate a new cl_clientid and cl_confirm.
+		 */
+		if (!(new = create_client(clname)))
+			goto out;
+		copy_verf(new,&clverifier);
+		new->cl_addr = ip_addr;
+		copy_cred(&new->cl_cred,&rqstp->rq_cred);
+		gen_clid(new);
+		gen_confirm(new);
+		gen_callback(new, setclid);
+		add_to_unconfirmed(new, strhashval);
+	} else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
+		/*	
+		 * CASE3:
+		 * confirmed found (name, principal match)
+		 * confirmed verifier does not match input clverifier
+		 *
+		 * unconfirmed found (name match)
+		 * confirmed->cl_confirm != unconfirmed->cl_confirm
+		 *
+		 * remove unconfirmed.
+		 *
+		 * create an unconfirmed nfs4_client 
+		 * with same cl_name as existing confirmed nfs4_client, 
+		 * but with new callback info, new cl_clientid,
+		 * new cl_verifier and a new cl_confirm
+		 */
+		expire_client(unconf);
+		if (!(new = create_client(clname)))
+			goto out;
+		copy_verf(new,&clverifier);
+		new->cl_addr = ip_addr;
+		copy_cred(&new->cl_cred,&rqstp->rq_cred);
+		gen_clid(new);
+		gen_confirm(new);
+		gen_callback(new, setclid);
+		add_to_unconfirmed(new, strhashval);
+	} else {
+		/* No cases hit !!! */
+		status = nfserr_inval;
+		goto out;
+
+	}
+	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
+	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
+	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
+	status = nfs_ok;
+out:
+	nfs4_unlock_state();
+	return status;
+}
+
+
+/*
+ * RFC 3010 has a complex implmentation description of processing a 
+ * SETCLIENTID_CONFIRM request consisting of 4 bullets describing
+ * processing on a DRC miss, labeled as CASE1 - CASE4 below.
+ *
+ * NOTE: callback information will be processed here in a future patch
+ */
+int
+nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
+{
+	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+	struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
+	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
+	clientid_t * clid = &setclientid_confirm->sc_clientid;
+	int status;
+
+	if (STALE_CLIENTID(clid))
+		return nfserr_stale_clientid;
+	/* 
+	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
+	 * We get here on a DRC miss.
+	 */
+
+	nfs4_lock_state();
+	clp = find_confirmed_client(clid);
+	if (clp) {
+		status = nfserr_inval;
+		/* 
+		 * Found a record for this clientid. If the IP addresses
+		 * don't match, return ERR_INVAL just as if the record had
+		 * not been found.
+		 */
+		if (clp->cl_addr != ip_addr) { 
+			printk("NFSD: setclientid: string in use by client"
+			"(clientid %08x/%08x)\n",
+			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			goto out;
+		}
+		conf = clp;
+	}
+	clp = find_unconfirmed_client(clid);
+	if (clp) {
+		status = nfserr_inval;
+		if (clp->cl_addr != ip_addr) { 
+			printk("NFSD: setclientid: string in use by client"
+			"(clientid %08x/%08x)\n",
+			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			goto out;
+		}
+		unconf = clp;
+	}
+	/* CASE 1: 
+	* unconf record that matches input clientid and input confirm.
+	* conf record that matches input clientid.
+	* conf  and unconf records match names, verifiers 
+	*/
+	if ((conf && unconf) && 
+	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
+	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
+	    (cmp_name(&conf->cl_name,&unconf->cl_name))  &&
+	    (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
+		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
+			status = nfserr_clid_inuse;
+		else {
+			expire_client(conf);
+			clp = unconf;
+			move_to_confirmed(unconf);
+			status = nfs_ok;
+		}
+		goto out;
+	} 
+	/* CASE 2:
+	 * conf record that matches input clientid.
+	 * if unconf record that matches input clientid, then unconf->cl_name
+	 * or unconf->cl_verifier don't match the conf record.
+	 */
+	if ((conf && !unconf) || 
+	    ((conf && unconf) && 
+	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
+	      !cmp_name(&conf->cl_name, &unconf->cl_name)))) {
+		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
+			status = nfserr_clid_inuse;
+		} else {
+			clp = conf;
+			status = nfs_ok;
+		}
+		goto out;
+	}
+	/* CASE 3:
+	 * conf record not found.
+	 * unconf record found. 
+	 * unconf->cl_confirm matches input confirm
+	 */ 
+	if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
+			status = nfserr_clid_inuse;
+		} else {
+			status = nfs_ok;
+			clp = unconf;
+			move_to_confirmed(unconf);
+		}
+		goto out;
+	}
+	/* CASE 4:
+	 * conf record not found, or if conf, then conf->cl_confirm does not
+	 * match input confirm.
+	 * unconf record not found, or if unconf, then unconf->cl_confirm 
+	 * does not match input confirm.
+	 */
+	if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
+	    (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
+		status = nfserr_stale_clientid;
+		goto out;
+	}
+	/* check that we have hit one of the cases...*/
+	status = nfserr_inval;
+	goto out;
+out:
+	if (!status)
+		nfsd4_probe_callback(clp);
+	nfs4_unlock_state();
+	return status;
+}
+
+/* 
+ * Open owner state (share locks)
+ */
+
+/* hash tables for nfs4_stateowner */
+#define OWNER_HASH_BITS              8
+#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
+
+#define ownerid_hashval(id) \
+        ((id) & OWNER_HASH_MASK)
+#define ownerstr_hashval(clientid, ownername) \
+        (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
+
+static struct list_head	ownerid_hashtbl[OWNER_HASH_SIZE];
+static struct list_head	ownerstr_hashtbl[OWNER_HASH_SIZE];
+
+/* hash table for nfs4_file */
+#define FILE_HASH_BITS                   8
+#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
+#define FILE_HASH_MASK                  (FILE_HASH_SIZE - 1)
+/* hash table for (open)nfs4_stateid */
+#define STATEID_HASH_BITS              10
+#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
+#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
+
+#define file_hashval(x) \
+        hash_ptr(x, FILE_HASH_BITS)
+#define stateid_hashval(owner_id, file_id)  \
+        (((owner_id) + (file_id)) & STATEID_HASH_MASK)
+
+static struct list_head file_hashtbl[FILE_HASH_SIZE];
+static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
+
+/* OPEN Share state helper functions */
+static inline struct nfs4_file *
+alloc_init_file(struct inode *ino)
+{
+	struct nfs4_file *fp;
+	unsigned int hashval = file_hashval(ino);
+
+	if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
+		INIT_LIST_HEAD(&fp->fi_hash);
+		INIT_LIST_HEAD(&fp->fi_perfile);
+		INIT_LIST_HEAD(&fp->fi_del_perfile);
+		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+		fp->fi_inode = igrab(ino);
+		fp->fi_id = current_fileid++;
+		alloc_file++;
+		return fp;
+	}
+	return NULL;
+}
+
+static void
+release_all_files(void)
+{
+	int i;
+	struct nfs4_file *fp;
+
+	for (i=0;i<FILE_HASH_SIZE;i++) {
+		while (!list_empty(&file_hashtbl[i])) {
+			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
+			/* this should never be more than once... */
+			if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
+				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
+			}
+			release_file(fp);
+		}
+	}
+}
+
+kmem_cache_t *stateowner_slab = NULL;
+
+static int
+nfsd4_init_slabs(void)
+{
+	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
+			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
+	if (stateowner_slab == NULL) {
+		dprintk("nfsd4: out of memory while initializing nfsv4\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void
+nfsd4_free_slabs(void)
+{
+	int status = 0;
+
+	if (stateowner_slab)
+		status = kmem_cache_destroy(stateowner_slab);
+	stateowner_slab = NULL;
+	BUG_ON(status);
+}
+
+void
+nfs4_free_stateowner(struct kref *kref)
+{
+	struct nfs4_stateowner *sop =
+		container_of(kref, struct nfs4_stateowner, so_ref);
+	kfree(sop->so_owner.data);
+	kmem_cache_free(stateowner_slab, sop);
+}
+
+static inline struct nfs4_stateowner *
+alloc_stateowner(struct xdr_netobj *owner)
+{
+	struct nfs4_stateowner *sop;
+
+	if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
+		if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
+			memcpy(sop->so_owner.data, owner->data, owner->len);
+			sop->so_owner.len = owner->len;
+			kref_init(&sop->so_ref);
+			return sop;
+		} 
+		kmem_cache_free(stateowner_slab, sop);
+	}
+	return NULL;
+}
+
+static struct nfs4_stateowner *
+alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
+	struct nfs4_stateowner *sop;
+	struct nfs4_replay *rp;
+	unsigned int idhashval;
+
+	if (!(sop = alloc_stateowner(&open->op_owner)))
+		return NULL;
+	idhashval = ownerid_hashval(current_ownerid);
+	INIT_LIST_HEAD(&sop->so_idhash);
+	INIT_LIST_HEAD(&sop->so_strhash);
+	INIT_LIST_HEAD(&sop->so_perclient);
+	INIT_LIST_HEAD(&sop->so_perfilestate);
+	INIT_LIST_HEAD(&sop->so_perlockowner);  /* not used */
+	INIT_LIST_HEAD(&sop->so_close_lru);
+	sop->so_time = 0;
+	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
+	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
+	list_add(&sop->so_perclient, &clp->cl_perclient);
+	add_perclient++;
+	sop->so_is_open_owner = 1;
+	sop->so_id = current_ownerid++;
+	sop->so_client = clp;
+	sop->so_seqid = open->op_seqid;
+	sop->so_confirmed = 0;
+	rp = &sop->so_replay;
+	rp->rp_status = NFSERR_SERVERFAULT;
+	rp->rp_buflen = 0;
+	rp->rp_buf = rp->rp_ibuf;
+	return sop;
+}
+
+static void
+release_stateid_lockowners(struct nfs4_stateid *open_stp)
+{
+	struct nfs4_stateowner *lock_sop;
+
+	while (!list_empty(&open_stp->st_perlockowner)) {
+		lock_sop = list_entry(open_stp->st_perlockowner.next,
+				struct nfs4_stateowner, so_perlockowner);
+		/* list_del(&open_stp->st_perlockowner);  */
+		BUG_ON(lock_sop->so_is_open_owner);
+		release_stateowner(lock_sop);
+	}
+}
+
+static void
+unhash_stateowner(struct nfs4_stateowner *sop)
+{
+	struct nfs4_stateid *stp;
+
+	list_del(&sop->so_idhash);
+	list_del(&sop->so_strhash);
+	if (sop->so_is_open_owner) {
+		list_del(&sop->so_perclient);
+		del_perclient++;
+	}
+	list_del(&sop->so_perlockowner);
+	while (!list_empty(&sop->so_perfilestate)) {
+		stp = list_entry(sop->so_perfilestate.next, 
+			struct nfs4_stateid, st_perfilestate);
+		if (sop->so_is_open_owner)
+			release_stateid(stp, OPEN_STATE);
+		else
+			release_stateid(stp, LOCK_STATE);
+	}
+}
+
+static void
+release_stateowner(struct nfs4_stateowner *sop)
+{
+	unhash_stateowner(sop);
+	list_del(&sop->so_close_lru);
+	nfs4_put_stateowner(sop);
+}
+
+static inline void
+init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
+	struct nfs4_stateowner *sop = open->op_stateowner;
+	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
+
+	INIT_LIST_HEAD(&stp->st_hash);
+	INIT_LIST_HEAD(&stp->st_perfilestate);
+	INIT_LIST_HEAD(&stp->st_perlockowner);
+	INIT_LIST_HEAD(&stp->st_perfile);
+	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	list_add_perfile++;
+	list_add(&stp->st_perfile, &fp->fi_perfile);
+	stp->st_stateowner = sop;
+	stp->st_file = fp;
+	stp->st_stateid.si_boot = boot_time;
+	stp->st_stateid.si_stateownerid = sop->so_id;
+	stp->st_stateid.si_fileid = fp->fi_id;
+	stp->st_stateid.si_generation = 0;
+	stp->st_access_bmap = 0;
+	stp->st_deny_bmap = 0;
+	__set_bit(open->op_share_access, &stp->st_access_bmap);
+	__set_bit(open->op_share_deny, &stp->st_deny_bmap);
+}
+
+static void
+release_stateid(struct nfs4_stateid *stp, int flags)
+{
+	struct file *filp = stp->st_vfs_file;
+
+	list_del(&stp->st_hash);
+	list_del_perfile++;
+	list_del(&stp->st_perfile);
+	list_del(&stp->st_perfilestate);
+	if (flags & OPEN_STATE) {
+		release_stateid_lockowners(stp);
+		stp->st_vfs_file = NULL;
+		nfsd_close(filp);
+		vfsclose++;
+	} else if (flags & LOCK_STATE)
+		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
+	kfree(stp);
+	stp = NULL;
+}
+
+static void
+release_file(struct nfs4_file *fp)
+{
+	free_file++;
+	list_del(&fp->fi_hash);
+	iput(fp->fi_inode);
+	kfree(fp);
+}	
+
+void
+move_to_close_lru(struct nfs4_stateowner *sop)
+{
+	dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
+
+	unhash_stateowner(sop);
+	list_add_tail(&sop->so_close_lru, &close_lru);
+	sop->so_time = get_seconds();
+}
+
+void
+release_state_owner(struct nfs4_stateid *stp, int flag)
+{
+	struct nfs4_stateowner *sop = stp->st_stateowner;
+	struct nfs4_file *fp = stp->st_file;
+
+	dprintk("NFSD: release_state_owner\n");
+	release_stateid(stp, flag);
+
+	/* place unused nfs4_stateowners on so_close_lru list to be
+	 * released by the laundromat service after the lease period
+	 * to enable us to handle CLOSE replay
+	 */
+	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+		move_to_close_lru(sop);
+	/* unused nfs4_file's are releseed. XXX slab cache? */
+	if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
+		release_file(fp);
+	}
+}
+
+static int
+cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) {
+	return ((sop->so_owner.len == owner->len) && 
+	 !memcmp(sop->so_owner.data, owner->data, owner->len) && 
+	  (sop->so_client->cl_clientid.cl_id == clid->cl_id));
+}
+
+static struct nfs4_stateowner *
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
+{
+	struct nfs4_stateowner *so = NULL;
+
+	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+		if (cmp_owner_str(so, &open->op_owner, &open->op_clientid))
+			return so;
+	}
+	return NULL;
+}
+
+/* search file_hashtbl[] for file */
+static struct nfs4_file *
+find_file(struct inode *ino)
+{
+	unsigned int hashval = file_hashval(ino);
+	struct nfs4_file *fp;
+
+	list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
+		if (fp->fi_inode == ino)
+			return fp;
+	}
+	return NULL;
+}
+
+#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
+#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
+
+void
+set_access(unsigned int *access, unsigned long bmap) {
+	int i;
+
+	*access = 0;
+	for (i = 1; i < 4; i++) {
+		if (test_bit(i, &bmap))
+			*access |= i;
+	}
+}
+
+void
+set_deny(unsigned int *deny, unsigned long bmap) {
+	int i;
+
+	*deny = 0;
+	for (i = 0; i < 4; i++) {
+		if (test_bit(i, &bmap))
+			*deny |= i ;
+	}
+}
+
+static int
+test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
+	unsigned int access, deny;
+
+	set_access(&access, stp->st_access_bmap);
+	set_deny(&deny, stp->st_deny_bmap);
+	if ((access & open->op_share_deny) || (deny & open->op_share_access))
+		return 0;
+	return 1;
+}
+
+/*
+ * Called to check deny when READ with all zero stateid or
+ * WRITE with all zero or all one stateid
+ */
+int
+nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+{
+	struct inode *ino = current_fh->fh_dentry->d_inode;
+	struct nfs4_file *fp;
+	struct nfs4_stateid *stp;
+
+	dprintk("NFSD: nfs4_share_conflict\n");
+
+	fp = find_file(ino);
+	if (fp) {
+	/* Search for conflicting share reservations */
+		list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
+			if (test_bit(deny_type, &stp->st_deny_bmap) ||
+			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+				return nfserr_share_denied;
+		}
+	}
+	return nfs_ok;
+}
+
+static inline void
+nfs4_file_downgrade(struct file *filp, unsigned int share_access)
+{
+	if (share_access & NFS4_SHARE_ACCESS_WRITE) {
+		put_write_access(filp->f_dentry->d_inode);
+		filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
+	}
+}
+
+/*
+ * Recall a delegation
+ */
+static int
+do_recall(void *__dp)
+{
+	struct nfs4_delegation *dp = __dp;
+
+	daemonize("nfsv4-recall");
+
+	nfsd4_cb_recall(dp);
+	return 0;
+}
+
+/*
+ * Spawn a thread to perform a recall on the delegation represented
+ * by the lease (file_lock)
+ *
+ * Called from break_lease() with lock_kernel() held.
+ * Note: we assume break_lease will only call this *once* for any given
+ * lease.
+ */
+static
+void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+	struct nfs4_delegation *dp=  (struct nfs4_delegation *)fl->fl_owner;
+	struct task_struct *t;
+
+	dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
+	if (!dp)
+		return;
+
+	/* We're assuming the state code never drops its reference
+	 * without first removing the lease.  Since we're in this lease
+	 * callback (and since the lease code is serialized by the kernel
+	 * lock) we know the server hasn't removed the lease yet, we know
+	 * it's safe to take a reference: */
+	atomic_inc(&dp->dl_count);
+
+	spin_lock(&recall_lock);
+	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+	spin_unlock(&recall_lock);
+
+	/* only place dl_time is set. protected by lock_kernel*/
+	dp->dl_time = get_seconds();
+
+	/* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
+	fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
+
+	t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
+	if (IS_ERR(t)) {
+		struct nfs4_client *clp = dp->dl_client;
+
+		printk(KERN_INFO "NFSD: Callback thread failed for "
+			"for client (clientid %08x/%08x)\n",
+			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+		nfs4_put_delegation(dp);
+	}
+}
+
+/*
+ * The file_lock is being reapd.
+ *
+ * Called by locks_free_lock() with lock_kernel() held.
+ */
+static
+void nfsd_release_deleg_cb(struct file_lock *fl)
+{
+	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+
+	dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count));
+
+	if (!(fl->fl_flags & FL_LEASE) || !dp)
+		return;
+	dp->dl_flock = NULL;
+}
+
+/*
+ * Set the delegation file_lock back pointer.
+ *
+ * Called from __setlease() with lock_kernel() held.
+ */
+static
+void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
+{
+	struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
+
+	dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
+	if (!dp)
+		return;
+	dp->dl_flock = new;
+}
+
+/*
+ * Called from __setlease() with lock_kernel() held
+ */
+static
+int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
+{
+	struct nfs4_delegation *onlistd =
+		(struct nfs4_delegation *)onlist->fl_owner;
+	struct nfs4_delegation *tryd =
+		(struct nfs4_delegation *)try->fl_owner;
+
+	if (onlist->fl_lmops != try->fl_lmops)
+		return 0;
+
+	return onlistd->dl_client == tryd->dl_client;
+}
+
+
+static
+int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
+{
+	if (arg & F_UNLCK)
+		return lease_modify(onlist, arg);
+	else
+		return -EAGAIN;
+}
+
+struct lock_manager_operations nfsd_lease_mng_ops = {
+	.fl_break = nfsd_break_deleg_cb,
+	.fl_release_private = nfsd_release_deleg_cb,
+	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
+	.fl_mylease = nfsd_same_client_deleg_cb,
+	.fl_change = nfsd_change_deleg_cb,
+};
+
+
+/*
+ * nfsd4_process_open1()
+ * 	lookup stateowner.
+ * 		found:
+ * 			check confirmed 
+ * 				confirmed:
+ * 					check seqid
+ * 				not confirmed:
+ * 					delete owner
+ * 					create new owner
+ * 		notfound:
+ * 			verify clientid
+ * 			create new owner
+ *
+ * called with nfs4_lock_state() held.
+ */
+int
+nfsd4_process_open1(struct nfsd4_open *open)
+{
+	int status;
+	clientid_t *clientid = &open->op_clientid;
+	struct nfs4_client *clp = NULL;
+	unsigned int strhashval;
+	struct nfs4_stateowner *sop = NULL;
+
+	status = nfserr_inval;
+	if (!check_name(open->op_owner))
+		goto out;
+
+	if (STALE_CLIENTID(&open->op_clientid))
+		return nfserr_stale_clientid;
+
+	strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
+	sop = find_openstateowner_str(strhashval, open);
+	if (sop) {
+		open->op_stateowner = sop;
+		/* check for replay */
+		if (open->op_seqid == sop->so_seqid){
+			if (sop->so_replay.rp_buflen)
+				return NFSERR_REPLAY_ME;
+			else {
+				/* The original OPEN failed so spectacularly
+				 * that we don't even have replay data saved!
+				 * Therefore, we have no choice but to continue
+				 * processing this OPEN; presumably, we'll
+				 * fail again for the same reason.
+				 */
+				dprintk("nfsd4_process_open1:"
+					" replay with no replay cache\n");
+				goto renew;
+			}
+		} else if (sop->so_confirmed) {
+			if (open->op_seqid == sop->so_seqid + 1)
+				goto renew;
+			status = nfserr_bad_seqid;
+			goto out;
+		} else {
+			/* If we get here, we received an OPEN for an
+			 * unconfirmed nfs4_stateowner. Since the seqid's are
+			 * different, purge the existing nfs4_stateowner, and
+			 * instantiate a new one.
+			 */
+			clp = sop->so_client;
+			release_stateowner(sop);
+		}
+	} else {
+		/* nfs4_stateowner not found.
+		 * Verify clientid and instantiate new nfs4_stateowner.
+		 * If verify fails this is presumably the result of the
+		 * client's lease expiring.
+		 */
+		status = nfserr_expired;
+		clp = find_confirmed_client(clientid);
+		if (clp == NULL)
+			goto out;
+	}
+	status = nfserr_resource;
+	sop = alloc_init_open_stateowner(strhashval, clp, open);
+	if (sop == NULL)
+		goto out;
+	open->op_stateowner = sop;
+renew:
+	status = nfs_ok;
+	renew_client(sop->so_client);
+out:
+	if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+		status = nfserr_reclaim_bad;
+	return status;
+}
+
+static int
+nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
+{
+	struct nfs4_stateid *local;
+	int status = nfserr_share_denied;
+	struct nfs4_stateowner *sop = open->op_stateowner;
+
+	list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+		/* ignore lock owners */
+		if (local->st_stateowner->so_is_open_owner == 0)
+			continue;
+		/* remember if we have seen this open owner */
+		if (local->st_stateowner == sop)
+			*stpp = local;
+		/* check for conflicting share reservations */
+		if (!test_share(local, open))
+			goto out;
+	}
+	status = 0;
+out:
+	return status;
+}
+
+static int
+nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+		struct svc_fh *cur_fh, int flags)
+{
+	struct nfs4_stateid *stp;
+	int status;
+
+	stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+	if (stp == NULL)
+		return nfserr_resource;
+
+	status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
+	if (status) {
+		if (status == nfserr_dropit)
+			status = nfserr_jukebox;
+		kfree(stp);
+		return status;
+	}
+	vfsopen++;
+	*stpp = stp;
+	return 0;
+}
+
+static inline int
+nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
+		struct nfsd4_open *open)
+{
+	struct iattr iattr = {
+		.ia_valid = ATTR_SIZE,
+		.ia_size = 0,
+	};
+	if (!open->op_truncate)
+		return 0;
+	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+		return -EINVAL;
+	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+}
+
+static int
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
+{
+	struct file *filp = stp->st_vfs_file;
+	struct inode *inode = filp->f_dentry->d_inode;
+	unsigned int share_access;
+	int status;
+
+	set_access(&share_access, stp->st_access_bmap);
+	share_access = ~share_access;
+	share_access &= open->op_share_access;
+
+	if (!(share_access & NFS4_SHARE_ACCESS_WRITE))
+		return nfsd4_truncate(rqstp, cur_fh, open);
+
+	status = get_write_access(inode);
+	if (status)
+		return nfserrno(status);
+	status = nfsd4_truncate(rqstp, cur_fh, open);
+	if (status) {
+		put_write_access(inode);
+		return status;
+	}
+	/* remember the open */
+	filp->f_mode = (filp->f_mode | FMODE_WRITE) & ~FMODE_READ;
+	set_bit(open->op_share_access, &stp->st_access_bmap);
+	set_bit(open->op_share_deny, &stp->st_deny_bmap);
+
+	return nfs_ok;
+}
+
+
+/* decrement seqid on successful reclaim, it will be bumped in encode_open */
+static void
+nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
+{
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) {
+		if (*status)
+			*status = nfserr_reclaim_bad;
+		else {
+			open->op_stateowner->so_confirmed = 1;
+			open->op_stateowner->so_seqid--;
+		}
+	}
+}
+
+/*
+ * Attempt to hand out a delegation.
+ */
+static void
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp)
+{
+	struct nfs4_delegation *dp;
+	struct nfs4_stateowner *sop = stp->st_stateowner;
+	struct nfs4_callback *cb = &sop->so_client->cl_callback;
+	struct file_lock fl, *flp = &fl;
+	int status, flag = 0;
+
+	flag = NFS4_OPEN_DELEGATE_NONE;
+	if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
+	     || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
+		goto out;
+
+	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+		flag = NFS4_OPEN_DELEGATE_WRITE;
+	else
+		flag = NFS4_OPEN_DELEGATE_READ;
+
+	dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
+	if (dp == NULL) {
+		flag = NFS4_OPEN_DELEGATE_NONE;
+		goto out;
+	}
+	locks_init_lock(&fl);
+	fl.fl_lmops = &nfsd_lease_mng_ops;
+	fl.fl_flags = FL_LEASE;
+	fl.fl_end = OFFSET_MAX;
+	fl.fl_owner =  (fl_owner_t)dp;
+	fl.fl_file = stp->st_vfs_file;
+	fl.fl_pid = current->tgid;
+
+	/* setlease checks to see if delegation should be handed out.
+	 * the lock_manager callbacks fl_mylease and fl_change are used
+	 */
+	if ((status = setlease(stp->st_vfs_file,
+		flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
+		dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+		list_del(&dp->dl_del_perfile);
+		list_del(&dp->dl_del_perclnt);
+		nfs4_put_delegation(dp);
+		free_delegation++;
+		flag = NFS4_OPEN_DELEGATE_NONE;
+		goto out;
+	}
+
+	memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
+
+	dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
+	             dp->dl_stateid.si_boot,
+	             dp->dl_stateid.si_stateownerid,
+	             dp->dl_stateid.si_fileid,
+	             dp->dl_stateid.si_generation);
+out:
+	open->op_delegate_type = flag;
+}
+
+/*
+ * called with nfs4_lock_state() held.
+ */
+int
+nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+	struct nfs4_file *fp = NULL;
+	struct inode *ino = current_fh->fh_dentry->d_inode;
+	struct nfs4_stateid *stp = NULL;
+	int status;
+
+	status = nfserr_inval;
+	if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
+		goto out;
+	/*
+	 * Lookup file; if found, lookup stateid and check open request,
+	 * and check for delegations in the process of being recalled.
+	 * If not found, create the nfs4_file struct
+	 */
+	fp = find_file(ino);
+	if (fp) {
+		if ((status = nfs4_check_open(fp, open, &stp)))
+			goto out;
+	} else {
+		status = nfserr_resource;
+		fp = alloc_init_file(ino);
+		if (fp == NULL)
+			goto out;
+	}
+
+	/*
+	 * OPEN the file, or upgrade an existing OPEN.
+	 * If truncate fails, the OPEN fails.
+	 */
+	if (stp) {
+		/* Stateid was found, this is an OPEN upgrade */
+		status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
+		if (status)
+			goto out;
+	} else {
+		/* Stateid was not found, this is a new OPEN */
+		int flags = 0;
+		if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+			flags = MAY_WRITE;
+		else
+			flags = MAY_READ;
+		if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
+			goto out;
+		init_stateid(stp, fp, open);
+		status = nfsd4_truncate(rqstp, current_fh, open);
+		if (status) {
+			release_stateid(stp, OPEN_STATE);
+			goto out;
+		}
+	}
+	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+	/*
+	* Attempt to hand out a delegation. No error return, because the
+	* OPEN succeeds even if we fail.
+	*/
+	nfs4_open_delegation(current_fh, open, stp);
+
+	status = nfs_ok;
+
+	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
+	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
+	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
+out:
+	/* take the opportunity to clean up unused state */
+	if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile))
+		release_file(fp);
+
+	/* CLAIM_PREVIOUS has different error returns */
+	nfs4_set_claim_prev(open, &status);
+	/*
+	* To finish the open response, we just need to set the rflags.
+	*/
+	open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+	if (!open->op_stateowner->so_confirmed)
+		open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
+
+	return status;
+}
+
+static struct work_struct laundromat_work;
+static void laundromat_main(void *);
+static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
+
+int 
+nfsd4_renew(clientid_t *clid)
+{
+	struct nfs4_client *clp;
+	int status;
+
+	nfs4_lock_state();
+	dprintk("process_renew(%08x/%08x): starting\n", 
+			clid->cl_boot, clid->cl_id);
+	status = nfserr_stale_clientid;
+	if (STALE_CLIENTID(clid))
+		goto out;
+	clp = find_confirmed_client(clid);
+	status = nfserr_expired;
+	if (clp == NULL) {
+		/* We assume the client took too long to RENEW. */
+		dprintk("nfsd4_renew: clientid not found!\n");
+		goto out;
+	}
+	renew_client(clp);
+	status = nfserr_cb_path_down;
+	if (!list_empty(&clp->cl_del_perclnt)
+			&& !atomic_read(&clp->cl_callback.cb_set))
+		goto out;
+	status = nfs_ok;
+out:
+	nfs4_unlock_state();
+	return status;
+}
+
+time_t
+nfs4_laundromat(void)
+{
+	struct nfs4_client *clp;
+	struct nfs4_stateowner *sop;
+	struct nfs4_delegation *dp;
+	struct list_head *pos, *next, reaplist;
+	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
+	time_t t, clientid_val = NFSD_LEASE_TIME;
+	time_t u, test_val = NFSD_LEASE_TIME;
+
+	nfs4_lock_state();
+
+	dprintk("NFSD: laundromat service - starting\n");
+	list_for_each_safe(pos, next, &client_lru) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+			t = clp->cl_time - cutoff;
+			if (clientid_val > t)
+				clientid_val = t;
+			break;
+		}
+		dprintk("NFSD: purging unused client (clientid %08x)\n",
+			clp->cl_clientid.cl_id);
+		expire_client(clp);
+	}
+	INIT_LIST_HEAD(&reaplist);
+	spin_lock(&recall_lock);
+	list_for_each_safe(pos, next, &del_recall_lru) {
+		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+			u = dp->dl_time - cutoff;
+			if (test_val > u)
+				test_val = u;
+			break;
+		}
+		dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
+			            dp, dp->dl_flock);
+		list_move(&dp->dl_recall_lru, &reaplist);
+	}
+	spin_unlock(&recall_lock);
+	list_for_each_safe(pos, next, &reaplist) {
+		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		list_del_init(&dp->dl_recall_lru);
+		unhash_delegation(dp);
+	}
+	test_val = NFSD_LEASE_TIME;
+	list_for_each_safe(pos, next, &close_lru) {
+		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
+			u = sop->so_time - cutoff;
+			if (test_val > u)
+				test_val = u;
+			break;
+		}
+		dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
+			sop->so_id);
+		list_del(&sop->so_close_lru);
+		nfs4_put_stateowner(sop);
+	}
+	if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
+		clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+	nfs4_unlock_state();
+	return clientid_val;
+}
+
+void
+laundromat_main(void *not_used)
+{
+	time_t t;
+
+	t = nfs4_laundromat();
+	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
+	schedule_delayed_work(&laundromat_work, t*HZ);
+}
+
+/* search ownerid_hashtbl[] and close_lru for stateid owner
+ * (stateid->si_stateownerid)
+ */
+struct nfs4_stateowner *
+find_openstateowner_id(u32 st_id, int flags) {
+	struct nfs4_stateowner *local = NULL;
+
+	dprintk("NFSD: find_openstateowner_id %d\n", st_id);
+	if (flags & CLOSE_STATE) {
+		list_for_each_entry(local, &close_lru, so_close_lru) {
+			if (local->so_id == st_id)
+				return local;
+		}
+	}
+	return NULL;
+}
+
+static inline int
+nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
+{
+	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_dentry->d_inode;
+}
+
+static int
+STALE_STATEID(stateid_t *stateid)
+{
+	if (stateid->si_boot == boot_time)
+		return 0;
+	printk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
+		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
+		stateid->si_generation);
+	return 1;
+}
+
+static inline int
+access_permit_read(unsigned long access_bmap)
+{
+	return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
+		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) ||
+		test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap);
+}
+
+static inline int
+access_permit_write(unsigned long access_bmap)
+{
+	return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
+		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static
+int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
+{
+        int status = nfserr_openmode;
+
+	if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
+                goto out;
+	if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
+                goto out;
+	status = nfs_ok;
+out:
+	return status;
+}
+
+static inline int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+		return nfserr_openmode;
+	else
+		return nfs_ok;
+}
+
+static inline int
+check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
+{
+	/* Trying to call delegreturn with a special stateid? Yuch: */
+	if (!(flags & (RD_STATE | WR_STATE)))
+		return nfserr_bad_stateid;
+	else if (ONE_STATEID(stateid) && (flags & RD_STATE))
+		return nfs_ok;
+	else if (nfs4_in_grace()) {
+		/* Answer in remaining cases depends on existance of
+		 * conflicting state; so we must wait out the grace period. */
+		return nfserr_grace;
+	} else if (flags & WR_STATE)
+		return nfs4_share_conflict(current_fh,
+				NFS4_SHARE_DENY_WRITE);
+	else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
+		return nfs4_share_conflict(current_fh,
+				NFS4_SHARE_DENY_READ);
+}
+
+/*
+ * Allow READ/WRITE during grace period on recovered state only for files
+ * that are not able to provide mandatory locking.
+ */
+static inline int
+io_during_grace_disallowed(struct inode *inode, int flags)
+{
+	return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE))
+		&& MANDATORY_LOCK(inode);
+}
+
+/*
+* Checks for stateid operations
+*/
+int
+nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
+{
+	struct nfs4_stateid *stp = NULL;
+	struct nfs4_delegation *dp = NULL;
+	stateid_t *stidp;
+	struct inode *ino = current_fh->fh_dentry->d_inode;
+	int status;
+
+	dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
+		stateid->si_boot, stateid->si_stateownerid, 
+		stateid->si_fileid, stateid->si_generation); 
+	if (filpp)
+		*filpp = NULL;
+
+	if (io_during_grace_disallowed(ino, flags))
+		return nfserr_grace;
+
+	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+		return check_special_stateids(current_fh, stateid, flags);
+
+	/* STALE STATEID */
+	status = nfserr_stale_stateid;
+	if (STALE_STATEID(stateid)) 
+		goto out;
+
+	/* BAD STATEID */
+	status = nfserr_bad_stateid;
+	if (!stateid->si_fileid) { /* delegation stateid */
+		if(!(dp = find_delegation_stateid(ino, stateid))) {
+			dprintk("NFSD: delegation stateid not found\n");
+			if (nfs4_in_grace())
+				status = nfserr_grace;
+			goto out;
+		}
+		stidp = &dp->dl_stateid;
+	} else { /* open or lock stateid */
+		if (!(stp = find_stateid(stateid, flags))) {
+			dprintk("NFSD: open or lock stateid not found\n");
+			if (nfs4_in_grace())
+				status = nfserr_grace;
+			goto out;
+		}
+		if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
+			goto out;
+		if (!stp->st_stateowner->so_confirmed)
+			goto out;
+		stidp = &stp->st_stateid;
+	}
+	if (stateid->si_generation > stidp->si_generation)
+		goto out;
+
+	/* OLD STATEID */
+	status = nfserr_old_stateid;
+	if (stateid->si_generation < stidp->si_generation)
+		goto out;
+	if (stp) {
+		if ((status = nfs4_check_openmode(stp,flags)))
+			goto out;
+		renew_client(stp->st_stateowner->so_client);
+		if (filpp)
+			*filpp = stp->st_vfs_file;
+	} else if (dp) {
+		if ((status = nfs4_check_delegmode(dp, flags)))
+			goto out;
+		renew_client(dp->dl_client);
+		if (flags & DELEG_RET)
+			unhash_delegation(dp);
+		if (filpp)
+			*filpp = dp->dl_vfs_file;
+	}
+	status = nfs_ok;
+out:
+	return status;
+}
+
+
+/* 
+ * Checks for sequence id mutating operations. 
+ */
+int
+nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
+{
+	int status;
+	struct nfs4_stateid *stp;
+	struct nfs4_stateowner *sop;
+
+	dprintk("NFSD: preprocess_seqid_op: seqid=%d " 
+			"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
+		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
+		stateid->si_generation);
+			        
+	*stpp = NULL;
+	*sopp = NULL;
+
+	status = nfserr_bad_stateid;
+	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+		printk("NFSD: preprocess_seqid_op: magic stateid!\n");
+		goto out;
+	}
+
+	status = nfserr_stale_stateid;
+	if (STALE_STATEID(stateid))
+		goto out;
+	/*
+	* We return BAD_STATEID if filehandle doesn't match stateid, 
+	* the confirmed flag is incorrecly set, or the generation 
+	* number is incorrect.  
+	* If there is no entry in the openfile table for this id, 
+	* we can't always return BAD_STATEID;
+	* this might be a retransmitted CLOSE which has arrived after 
+	* the openfile has been released.
+	*/
+	if (!(stp = find_stateid(stateid, flags)))
+		goto no_nfs4_stateid;
+
+	status = nfserr_bad_stateid;
+
+	/* for new lock stateowners:
+	 * check that the lock->v.new.open_stateid
+	 * refers to an open stateowner
+	 *
+	 * check that the lockclid (nfs4_lock->v.new.clientid) is the same
+	 * as the open_stateid->st_stateowner->so_client->clientid
+	 */
+	if (lockclid) {
+		struct nfs4_stateowner *sop = stp->st_stateowner;
+		struct nfs4_client *clp = sop->so_client;
+
+		if (!sop->so_is_open_owner)
+			goto out;
+		if (!cmp_clid(&clp->cl_clientid, lockclid))
+			goto out;
+	}
+
+	if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
+		printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
+		goto out;
+	}
+
+	*stpp = stp;
+	*sopp = sop = stp->st_stateowner;
+
+	/*
+	*  We now validate the seqid and stateid generation numbers.
+	*  For the moment, we ignore the possibility of 
+	*  generation number wraparound.
+	*/
+	if (seqid != sop->so_seqid + 1)
+		goto check_replay;
+
+	if (sop->so_confirmed) {
+		if (flags & CONFIRM) {
+			printk("NFSD: preprocess_seqid_op: expected unconfirmed stateowner!\n");
+			goto out;
+		}
+	}
+	else {
+		if (!(flags & CONFIRM)) {
+			printk("NFSD: preprocess_seqid_op: stateowner not confirmed yet!\n");
+			goto out;
+		}
+	}
+	if (stateid->si_generation > stp->st_stateid.si_generation) {
+		printk("NFSD: preprocess_seqid_op: future stateid?!\n");
+		goto out;
+	}
+
+	status = nfserr_old_stateid;
+	if (stateid->si_generation < stp->st_stateid.si_generation) {
+		printk("NFSD: preprocess_seqid_op: old stateid!\n");
+		goto out;
+	}
+	/* XXX renew the client lease here */
+	status = nfs_ok;
+
+out:
+	return status;
+
+no_nfs4_stateid:
+
+	/*
+	* We determine whether this is a bad stateid or a replay, 
+	* starting by trying to look up the stateowner.
+	* If stateowner is not found - stateid is bad.
+	*/
+	if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) {
+		printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n");
+		status = nfserr_bad_stateid;
+		goto out;
+	}
+	*sopp = sop;
+
+check_replay:
+	if (seqid == sop->so_seqid) {
+		printk("NFSD: preprocess_seqid_op: retransmission?\n");
+		/* indicate replay to calling function */
+		status = NFSERR_REPLAY_ME;
+	} else  {
+		printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid);
+
+		*sopp = NULL;
+		status = nfserr_bad_seqid;
+	}
+	goto out;
+}
+
+int
+nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
+{
+	int status;
+	struct nfs4_stateowner *sop;
+	struct nfs4_stateid *stp;
+
+	dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
+			(int)current_fh->fh_dentry->d_name.len,
+			current_fh->fh_dentry->d_name.name);
+
+	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+		goto out;
+
+	nfs4_lock_state();
+
+	if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid,
+					&oc->oc_req_stateid,
+					CHECK_FH | CONFIRM | OPEN_STATE,
+					&oc->oc_stateowner, &stp, NULL)))
+		goto out; 
+
+	sop = oc->oc_stateowner;
+	sop->so_confirmed = 1;
+	update_stateid(&stp->st_stateid);
+	memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
+	dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 
+		"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
+		         stp->st_stateid.si_boot,
+		         stp->st_stateid.si_stateownerid,
+		         stp->st_stateid.si_fileid,
+		         stp->st_stateid.si_generation);
+out:
+	if (oc->oc_stateowner)
+		nfs4_get_stateowner(oc->oc_stateowner);
+	nfs4_unlock_state();
+	return status;
+}
+
+
+/*
+ * unset all bits in union bitmap (bmap) that
+ * do not exist in share (from successful OPEN_DOWNGRADE)
+ */
+static void
+reset_union_bmap_access(unsigned long access, unsigned long *bmap)
+{
+	int i;
+	for (i = 1; i < 4; i++) {
+		if ((i & access) != i)
+			__clear_bit(i, bmap);
+	}
+}
+
+static void
+reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
+{
+	int i;
+	for (i = 0; i < 4; i++) {
+		if ((i & deny) != i)
+			__clear_bit(i, bmap);
+	}
+}
+
+int
+nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od)
+{
+	int status;
+	struct nfs4_stateid *stp;
+	unsigned int share_access;
+
+	dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 
+			(int)current_fh->fh_dentry->d_name.len,
+			current_fh->fh_dentry->d_name.name);
+
+	if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny))
+		return nfserr_inval;
+
+	nfs4_lock_state();
+	if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid, 
+					&od->od_stateid, 
+					CHECK_FH | OPEN_STATE, 
+					&od->od_stateowner, &stp, NULL)))
+		goto out; 
+
+	status = nfserr_inval;
+	if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
+		dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
+			stp->st_access_bmap, od->od_share_access);
+		goto out;
+	}
+	if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
+		dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
+			stp->st_deny_bmap, od->od_share_deny);
+		goto out;
+	}
+	set_access(&share_access, stp->st_access_bmap);
+	nfs4_file_downgrade(stp->st_vfs_file,
+	                    share_access & ~od->od_share_access);
+
+	reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
+	reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
+
+	update_stateid(&stp->st_stateid);
+	memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
+	status = nfs_ok;
+out:
+	if (od->od_stateowner)
+		nfs4_get_stateowner(od->od_stateowner);
+	nfs4_unlock_state();
+	return status;
+}
+
+/*
+ * nfs4_unlock_state() called after encode
+ */
+int
+nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close)
+{
+	int status;
+	struct nfs4_stateid *stp;
+
+	dprintk("NFSD: nfsd4_close on file %.*s\n", 
+			(int)current_fh->fh_dentry->d_name.len,
+			current_fh->fh_dentry->d_name.name);
+
+	nfs4_lock_state();
+	/* check close_lru for replay */
+	if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, 
+					&close->cl_stateid, 
+					CHECK_FH | OPEN_STATE | CLOSE_STATE,
+					&close->cl_stateowner, &stp, NULL)))
+		goto out; 
+	/*
+	*  Return success, but first update the stateid.
+	*/
+	status = nfs_ok;
+	update_stateid(&stp->st_stateid);
+	memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+	/* release_state_owner() calls nfsd_close() if needed */
+	release_state_owner(stp, OPEN_STATE);
+out:
+	if (close->cl_stateowner)
+		nfs4_get_stateowner(close->cl_stateowner);
+	nfs4_unlock_state();
+	return status;
+}
+
+int
+nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
+{
+	int status;
+
+	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+		goto out;
+
+	nfs4_lock_state();
+	status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL);
+	nfs4_unlock_state();
+out:
+	return status;
+}
+
+
+/* 
+ * Lock owner state (byte-range locks)
+ */
+#define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
+#define LOCK_HASH_BITS              8
+#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+
+#define lockownerid_hashval(id) \
+        ((id) & LOCK_HASH_MASK)
+
+static inline unsigned int
+lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
+		struct xdr_netobj *ownername)
+{
+	return (file_hashval(inode) + cl_id
+			+ opaque_hashval(ownername->data, ownername->len))
+		& LOCK_HASH_MASK;
+}
+
+static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
+static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+
+struct nfs4_stateid *
+find_stateid(stateid_t *stid, int flags)
+{
+	struct nfs4_stateid *local = NULL;
+	u32 st_id = stid->si_stateownerid;
+	u32 f_id = stid->si_fileid;
+	unsigned int hashval;
+
+	dprintk("NFSD: find_stateid flags 0x%x\n",flags);
+	if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
+		hashval = stateid_hashval(st_id, f_id);
+		list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
+			if ((local->st_stateid.si_stateownerid == st_id) &&
+			    (local->st_stateid.si_fileid == f_id))
+				return local;
+		}
+	} 
+	if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
+		hashval = stateid_hashval(st_id, f_id);
+		list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
+			if ((local->st_stateid.si_stateownerid == st_id) &&
+			    (local->st_stateid.si_fileid == f_id))
+				return local;
+		}
+	} else
+		printk("NFSD: find_stateid: ERROR: no state flag\n");
+	return NULL;
+}
+
+static struct nfs4_delegation *
+find_delegation_stateid(struct inode *ino, stateid_t *stid)
+{
+	struct nfs4_delegation *dp = NULL;
+	struct nfs4_file *fp = NULL;
+	u32 st_id;
+
+	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
+                    stid->si_boot, stid->si_stateownerid,
+                    stid->si_fileid, stid->si_generation);
+
+	st_id = stid->si_stateownerid;
+	fp = find_file(ino);
+	if (fp) {
+		list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+			if(dp->dl_stateid.si_stateownerid == st_id) {
+				dprintk("NFSD: find_delegation dp %p\n",dp);
+				return dp;
+			}
+		}
+	}
+	return NULL;
+}
+
+/*
+ * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
+ * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
+ * byte, because of sign extension problems.  Since NFSv4 calls for 64-bit
+ * locking, this prevents us from being completely protocol-compliant.  The
+ * real solution to this problem is to start using unsigned file offsets in
+ * the VFS, but this is a very deep change!
+ */
+static inline void
+nfs4_transform_lock_offset(struct file_lock *lock)
+{
+	if (lock->fl_start < 0)
+		lock->fl_start = OFFSET_MAX;
+	if (lock->fl_end < 0)
+		lock->fl_end = OFFSET_MAX;
+}
+
+int
+nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
+{
+	struct nfs4_stateowner *local = NULL;
+	int status = 0;
+			        
+	if (hashval >= LOCK_HASH_SIZE)
+		goto out;
+	list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
+		if (local == sop) {
+			status = 1;
+			goto out;
+		}
+	}
+out:
+	return status;
+}
+
+
+static inline void
+nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
+{
+	struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner;
+	unsigned int hval = lockownerid_hashval(sop->so_id);
+
+	deny->ld_sop = NULL;
+	if (nfs4_verify_lock_stateowner(sop, hval)) {
+		kref_get(&sop->so_ref);
+		deny->ld_sop = sop;
+		deny->ld_clientid = sop->so_client->cl_clientid;
+	}
+	deny->ld_start = fl->fl_start;
+	deny->ld_length = ~(u64)0;
+	if (fl->fl_end != ~(u64)0)
+		deny->ld_length = fl->fl_end - fl->fl_start + 1;        
+	deny->ld_type = NFS4_READ_LT;
+	if (fl->fl_type != F_RDLCK)
+		deny->ld_type = NFS4_WRITE_LT;
+}
+
+static struct nfs4_stateowner *
+find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
+{
+	struct nfs4_stateowner *local = NULL;
+	int i;
+
+	for (i = 0; i < LOCK_HASH_SIZE; i++) {
+		list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
+			if (!cmp_owner_str(local, owner, clid))
+				continue;
+			return local;
+		}
+	}
+	return NULL;
+}
+
+static struct nfs4_stateowner *
+find_lockstateowner_str(struct inode *inode, clientid_t *clid,
+		struct xdr_netobj *owner)
+{
+	unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
+	struct nfs4_stateowner *op;
+
+	list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
+		if (cmp_owner_str(op, owner, clid))
+			return op;
+	}
+	return NULL;
+}
+
+/*
+ * Alloc a lock owner structure.
+ * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
+ * occured. 
+ *
+ * strhashval = lock_ownerstr_hashval 
+ * so_seqid = lock->lk_new_lock_seqid - 1: it gets bumped in encode 
+ */
+
+static struct nfs4_stateowner *
+alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) {
+	struct nfs4_stateowner *sop;
+	struct nfs4_replay *rp;
+	unsigned int idhashval;
+
+	if (!(sop = alloc_stateowner(&lock->lk_new_owner)))
+		return NULL;
+	idhashval = lockownerid_hashval(current_ownerid);
+	INIT_LIST_HEAD(&sop->so_idhash);
+	INIT_LIST_HEAD(&sop->so_strhash);
+	INIT_LIST_HEAD(&sop->so_perclient);
+	INIT_LIST_HEAD(&sop->so_perfilestate);
+	INIT_LIST_HEAD(&sop->so_perlockowner);
+	INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
+	sop->so_time = 0;
+	list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
+	list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+	list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+	sop->so_is_open_owner = 0;
+	sop->so_id = current_ownerid++;
+	sop->so_client = clp;
+	sop->so_seqid = lock->lk_new_lock_seqid - 1;
+	sop->so_confirmed = 1;
+	rp = &sop->so_replay;
+	rp->rp_status = NFSERR_SERVERFAULT;
+	rp->rp_buflen = 0;
+	rp->rp_buf = rp->rp_ibuf;
+	return sop;
+}
+
+struct nfs4_stateid *
+alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
+{
+	struct nfs4_stateid *stp;
+	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
+
+	if ((stp = kmalloc(sizeof(struct nfs4_stateid), 
+					GFP_KERNEL)) == NULL)
+		goto out;
+	INIT_LIST_HEAD(&stp->st_hash);
+	INIT_LIST_HEAD(&stp->st_perfile);
+	INIT_LIST_HEAD(&stp->st_perfilestate);
+	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+	list_add(&stp->st_perfile, &fp->fi_perfile);
+	list_add_perfile++;
+	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	stp->st_stateowner = sop;
+	stp->st_file = fp;
+	stp->st_stateid.si_boot = boot_time;
+	stp->st_stateid.si_stateownerid = sop->so_id;
+	stp->st_stateid.si_fileid = fp->fi_id;
+	stp->st_stateid.si_generation = 0;
+	stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
+	stp->st_access_bmap = open_stp->st_access_bmap;
+	stp->st_deny_bmap = open_stp->st_deny_bmap;
+
+out:
+	return stp;
+}
+
+int
+check_lock_length(u64 offset, u64 length)
+{
+	return ((length == 0)  || ((length != ~(u64)0) &&
+	     LOFF_OVERFLOW(offset, length)));
+}
+
+/*
+ *  LOCK operation 
+ */
+int
+nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
+{
+	struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL;
+	struct nfs4_stateid *lock_stp;
+	struct file *filp;
+	struct file_lock file_lock;
+	struct file_lock *conflock;
+	int status = 0;
+	unsigned int strhashval;
+
+	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
+		(long long) lock->lk_offset,
+		(long long) lock->lk_length);
+
+	if (nfs4_in_grace() && !lock->lk_reclaim)
+		return nfserr_grace;
+	if (!nfs4_in_grace() && lock->lk_reclaim)
+		return nfserr_no_grace;
+
+	if (check_lock_length(lock->lk_offset, lock->lk_length))
+		 return nfserr_inval;
+
+	nfs4_lock_state();
+
+	if (lock->lk_is_new) {
+	/*
+	 * Client indicates that this is a new lockowner.
+	 * Use open owner and open stateid to create lock owner and lock 
+	 * stateid.
+	 */
+		struct nfs4_stateid *open_stp = NULL;
+		struct nfs4_file *fp;
+		
+		status = nfserr_stale_clientid;
+		if (STALE_CLIENTID(&lock->lk_new_clientid)) {
+			printk("NFSD: nfsd4_lock: clientid is stale!\n");
+			goto out;
+		}
+
+		/* is the new lock seqid presented by the client zero? */
+		status = nfserr_bad_seqid;
+		if (lock->v.new.lock_seqid != 0)
+			goto out;
+
+		/* validate and update open stateid and open seqid */
+		status = nfs4_preprocess_seqid_op(current_fh, 
+				        lock->lk_new_open_seqid,
+		                        &lock->lk_new_open_stateid,
+		                        CHECK_FH | OPEN_STATE,
+		                        &open_sop, &open_stp,
+					&lock->v.new.clientid);
+		if (status) {
+			if (lock->lk_reclaim)
+				status = nfserr_reclaim_bad;
+			goto out;
+		}
+		/* create lockowner and lock stateid */
+		fp = open_stp->st_file;
+		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
+				open_sop->so_client->cl_clientid.cl_id, 
+				&lock->v.new.owner);
+		/* 
+		 * If we already have this lock owner, the client is in 
+		 * error (or our bookeeping is wrong!) 
+		 * for asking for a 'new lock'.
+		 */
+		status = nfserr_bad_stateid;
+		lock_sop = find_lockstateowner(&lock->v.new.owner,
+						&lock->v.new.clientid);
+		if (lock_sop)
+			goto out;
+		status = nfserr_resource;
+		if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
+			goto out;
+		if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, 
+						fp, open_stp)) == NULL) {
+			release_stateowner(lock->lk_stateowner);
+			lock->lk_stateowner = NULL;
+			goto out;
+		}
+		/* bump the open seqid used to create the lock */
+		open_sop->so_seqid++;
+	} else {
+		/* lock (lock owner + lock stateid) already exists */
+		status = nfs4_preprocess_seqid_op(current_fh,
+				       lock->lk_old_lock_seqid, 
+				       &lock->lk_old_lock_stateid, 
+				       CHECK_FH | LOCK_STATE, 
+				       &lock->lk_stateowner, &lock_stp, NULL);
+		if (status)
+			goto out;
+	}
+	/* lock->lk_stateowner and lock_stp have been created or found */
+	filp = lock_stp->st_vfs_file;
+
+	if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
+		printk("NFSD: nfsd4_lock: permission denied!\n");
+		goto out;
+	}
+
+	locks_init_lock(&file_lock);
+	switch (lock->lk_type) {
+		case NFS4_READ_LT:
+		case NFS4_READW_LT:
+			file_lock.fl_type = F_RDLCK;
+		break;
+		case NFS4_WRITE_LT:
+		case NFS4_WRITEW_LT:
+			file_lock.fl_type = F_WRLCK;
+		break;
+		default:
+			status = nfserr_inval;
+		goto out;
+	}
+	file_lock.fl_owner = (fl_owner_t) lock->lk_stateowner;
+	file_lock.fl_pid = current->tgid;
+	file_lock.fl_file = filp;
+	file_lock.fl_flags = FL_POSIX;
+
+	file_lock.fl_start = lock->lk_offset;
+	if ((lock->lk_length == ~(u64)0) || 
+			LOFF_OVERFLOW(lock->lk_offset, lock->lk_length))
+		file_lock.fl_end = ~(u64)0;
+	else
+		file_lock.fl_end = lock->lk_offset + lock->lk_length - 1;
+	nfs4_transform_lock_offset(&file_lock);
+
+	/*
+	* Try to lock the file in the VFS.
+	* Note: locks.c uses the BKL to protect the inode's lock list.
+	*/
+
+	status = posix_lock_file(filp, &file_lock);
+	if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+		file_lock.fl_ops->fl_release_private(&file_lock);
+	dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
+	switch (-status) {
+	case 0: /* success! */
+		update_stateid(&lock_stp->st_stateid);
+		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, 
+				sizeof(stateid_t));
+		goto out;
+	case (EAGAIN):
+		goto conflicting_lock;
+	case (EDEADLK):
+		status = nfserr_deadlock;
+	default:        
+		dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
+		goto out_destroy_new_stateid;
+	}
+
+conflicting_lock:
+	dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
+	status = nfserr_denied;
+	/* XXX There is a race here. Future patch needed to provide 
+	 * an atomic posix_lock_and_test_file
+	 */
+	if (!(conflock = posix_test_lock(filp, &file_lock))) {
+		status = nfserr_serverfault;
+		goto out;
+	}
+	nfs4_set_lock_denied(conflock, &lock->lk_denied);
+
+out_destroy_new_stateid:
+	if (lock->lk_is_new) {
+		dprintk("NFSD: nfsd4_lock: destroy new stateid!\n");
+	/*
+	* An error encountered after instantiation of the new
+	* stateid has forced us to destroy it.
+	*/
+		if (!seqid_mutating_err(status))
+			open_sop->so_seqid--;
+
+		release_state_owner(lock_stp, LOCK_STATE);
+	}
+out:
+	if (lock->lk_stateowner)
+		nfs4_get_stateowner(lock->lk_stateowner);
+	nfs4_unlock_state();
+	return status;
+}
+
+/*
+ * LOCKT operation
+ */
+int
+nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt)
+{
+	struct inode *inode;
+	struct file file;
+	struct file_lock file_lock;
+	struct file_lock *conflicting_lock;
+	int status;
+
+	if (nfs4_in_grace())
+		return nfserr_grace;
+
+	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
+		 return nfserr_inval;
+
+	lockt->lt_stateowner = NULL;
+	nfs4_lock_state();
+
+	status = nfserr_stale_clientid;
+	if (STALE_CLIENTID(&lockt->lt_clientid)) {
+		printk("NFSD: nfsd4_lockt: clientid is stale!\n");
+		goto out;
+	}
+
+	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) {
+		printk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
+		if (status == nfserr_symlink)
+			status = nfserr_inval;
+		goto out;
+	}
+
+	inode = current_fh->fh_dentry->d_inode;
+	locks_init_lock(&file_lock);
+	switch (lockt->lt_type) {
+		case NFS4_READ_LT:
+		case NFS4_READW_LT:
+			file_lock.fl_type = F_RDLCK;
+		break;
+		case NFS4_WRITE_LT:
+		case NFS4_WRITEW_LT:
+			file_lock.fl_type = F_WRLCK;
+		break;
+		default:
+			printk("NFSD: nfs4_lockt: bad lock type!\n");
+			status = nfserr_inval;
+		goto out;
+	}
+
+	lockt->lt_stateowner = find_lockstateowner_str(inode,
+			&lockt->lt_clientid, &lockt->lt_owner);
+	if (lockt->lt_stateowner)
+		file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
+	file_lock.fl_pid = current->tgid;
+	file_lock.fl_flags = FL_POSIX;
+
+	file_lock.fl_start = lockt->lt_offset;
+	if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
+		file_lock.fl_end = ~(u64)0;
+	else
+		file_lock.fl_end = lockt->lt_offset + lockt->lt_length - 1;
+
+	nfs4_transform_lock_offset(&file_lock);
+
+	/* posix_test_lock uses the struct file _only_ to resolve the inode.
+	 * since LOCKT doesn't require an OPEN, and therefore a struct
+	 * file may not exist, pass posix_test_lock a struct file with
+	 * only the dentry:inode set.
+	 */
+	memset(&file, 0, sizeof (struct file));
+	file.f_dentry = current_fh->fh_dentry;
+
+	status = nfs_ok;
+	conflicting_lock = posix_test_lock(&file, &file_lock);
+	if (conflicting_lock) {
+		status = nfserr_denied;
+		nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+	}
+out:
+	nfs4_unlock_state();
+	return status;
+}
+
+int
+nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku)
+{
+	struct nfs4_stateid *stp;
+	struct file *filp = NULL;
+	struct file_lock file_lock;
+	int status;
+						        
+	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
+		(long long) locku->lu_offset,
+		(long long) locku->lu_length);
+
+	if (check_lock_length(locku->lu_offset, locku->lu_length))
+		 return nfserr_inval;
+
+	nfs4_lock_state();
+									        
+	if ((status = nfs4_preprocess_seqid_op(current_fh, 
+					locku->lu_seqid, 
+					&locku->lu_stateid, 
+					CHECK_FH | LOCK_STATE, 
+					&locku->lu_stateowner, &stp, NULL)))
+		goto out;
+
+	filp = stp->st_vfs_file;
+	BUG_ON(!filp);
+	locks_init_lock(&file_lock);
+	file_lock.fl_type = F_UNLCK;
+	file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner;
+	file_lock.fl_pid = current->tgid;
+	file_lock.fl_file = filp;
+	file_lock.fl_flags = FL_POSIX; 
+	file_lock.fl_start = locku->lu_offset;
+
+	if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
+		file_lock.fl_end = ~(u64)0;
+	else
+		file_lock.fl_end = locku->lu_offset + locku->lu_length - 1;
+	nfs4_transform_lock_offset(&file_lock);
+
+	/*
+	*  Try to unlock the file in the VFS.
+	*/
+	status = posix_lock_file(filp, &file_lock); 
+	if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+		file_lock.fl_ops->fl_release_private(&file_lock);
+	if (status) {
+		printk("NFSD: nfs4_locku: posix_lock_file failed!\n");
+		goto out_nfserr;
+	}
+	/*
+	* OK, unlock succeeded; the only thing left to do is update the stateid.
+	*/
+	update_stateid(&stp->st_stateid);
+	memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+out:
+	if (locku->lu_stateowner)
+		nfs4_get_stateowner(locku->lu_stateowner);
+	nfs4_unlock_state();
+	return status;
+
+out_nfserr:
+	status = nfserrno(status);
+	goto out;
+}
+
+/*
+ * returns
+ * 	1: locks held by lockowner
+ * 	0: no locks held by lockowner
+ */
+static int
+check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
+{
+	struct file_lock **flpp;
+	struct inode *inode = filp->f_dentry->d_inode;
+	int status = 0;
+
+	lock_kernel();
+	for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
+		if ((*flpp)->fl_owner == (fl_owner_t)lowner)
+			status = 1;
+			goto out;
+	}
+out:
+	unlock_kernel();
+	return status;
+}
+
+int
+nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
+{
+	clientid_t *clid = &rlockowner->rl_clientid;
+	struct nfs4_stateowner *local = NULL;
+	struct xdr_netobj *owner = &rlockowner->rl_owner;
+	int status;
+
+	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+		clid->cl_boot, clid->cl_id);
+
+	/* XXX check for lease expiration */
+
+	status = nfserr_stale_clientid;
+	if (STALE_CLIENTID(clid)) {
+		printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n");
+		return status;
+	}
+
+	nfs4_lock_state();
+
+	status = nfs_ok;
+	local = find_lockstateowner(owner, clid);
+	if (local) {
+		struct nfs4_stateid *stp;
+
+		/* check for any locks held by any stateid
+		 * associated with the (lock) stateowner */
+		status = nfserr_locks_held;
+		list_for_each_entry(stp, &local->so_perfilestate,
+				st_perfilestate) {
+			if (check_for_locks(stp->st_vfs_file, local))
+				goto out;
+		}
+		/* no locks held by (lock) stateowner */
+		status = nfs_ok;
+		release_stateowner(local);
+	}
+out:
+	nfs4_unlock_state();
+	return status;
+}
+
+static inline struct nfs4_client_reclaim *
+alloc_reclaim(int namelen)
+{
+	struct nfs4_client_reclaim *crp = NULL;
+
+	crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+	if (!crp)
+		return NULL;
+	crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
+	if (!crp->cr_name.data) {
+		kfree(crp);
+		return NULL;
+	}
+	return crp;
+}
+
+/*
+ * failure => all reset bets are off, nfserr_no_grace...
+ */
+static int
+nfs4_client_to_reclaim(char *name, int namlen)
+{
+	unsigned int strhashval;
+	struct nfs4_client_reclaim *crp = NULL;
+
+	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
+	crp = alloc_reclaim(namlen);
+	if (!crp)
+		return 0;
+	strhashval = clientstr_hashval(name, namlen);
+	INIT_LIST_HEAD(&crp->cr_strhash);
+	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
+	memcpy(crp->cr_name.data, name, namlen);
+	crp->cr_name.len = namlen;
+	reclaim_str_hashtbl_size++;
+	return 1;
+}
+
+static void
+nfs4_release_reclaim(void)
+{
+	struct nfs4_client_reclaim *crp = NULL;
+	int i;
+
+	BUG_ON(!nfs4_reclaim_init);
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		while (!list_empty(&reclaim_str_hashtbl[i])) {
+			crp = list_entry(reclaim_str_hashtbl[i].next,
+			                struct nfs4_client_reclaim, cr_strhash);
+			list_del(&crp->cr_strhash);
+			kfree(crp->cr_name.data);
+			kfree(crp);
+			reclaim_str_hashtbl_size--;
+		}
+	}
+	BUG_ON(reclaim_str_hashtbl_size);
+}
+
+/*
+ * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
+struct nfs4_client_reclaim *
+nfs4_find_reclaim_client(clientid_t *clid)
+{
+	unsigned int strhashval;
+	struct nfs4_client *clp;
+	struct nfs4_client_reclaim *crp = NULL;
+
+
+	/* find clientid in conf_id_hashtbl */
+	clp = find_confirmed_client(clid);
+	if (clp == NULL)
+		return NULL;
+
+	dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
+		            clp->cl_name.len, clp->cl_name.data);
+
+	/* find clp->cl_name in reclaim_str_hashtbl */
+	strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
+		if (cmp_name(&crp->cr_name, &clp->cl_name)) {
+			return crp;
+		}
+	}
+	return NULL;
+}
+
+/*
+* Called from OPEN. Look for clientid in reclaim list.
+*/
+int
+nfs4_check_open_reclaim(clientid_t *clid)
+{
+	struct nfs4_client_reclaim *crp;
+
+	if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
+		return nfserr_reclaim_bad;
+	return nfs_ok;
+}
+
+
+/* 
+ * Start and stop routines
+ */
+
+static void
+__nfs4_state_init(void)
+{
+	int i;
+	time_t grace_time;
+
+	if (!nfs4_reclaim_init) {
+		for (i = 0; i < CLIENT_HASH_SIZE; i++)
+			INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+		reclaim_str_hashtbl_size = 0;
+		nfs4_reclaim_init = 1;
+	}
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
+		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
+		INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
+		INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
+	}
+	for (i = 0; i < FILE_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&file_hashtbl[i]);
+	}
+	for (i = 0; i < OWNER_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
+		INIT_LIST_HEAD(&ownerid_hashtbl[i]);
+	}
+	for (i = 0; i < STATEID_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&stateid_hashtbl[i]);
+		INIT_LIST_HEAD(&lockstateid_hashtbl[i]);
+	}
+	for (i = 0; i < LOCK_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
+		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+	}
+	memset(&zerostateid, 0, sizeof(stateid_t));
+	memset(&onestateid, ~0, sizeof(stateid_t));
+
+	INIT_LIST_HEAD(&close_lru);
+	INIT_LIST_HEAD(&client_lru);
+	INIT_LIST_HEAD(&del_recall_lru);
+	spin_lock_init(&recall_lock);
+	boot_time = get_seconds();
+	grace_time = max(old_lease_time, lease_time);
+	if (reclaim_str_hashtbl_size == 0)
+		grace_time = 0;
+	if (grace_time)
+		printk("NFSD: starting %ld-second grace period\n", grace_time);
+	grace_end = boot_time + grace_time;
+	INIT_WORK(&laundromat_work,laundromat_main, NULL);
+	schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
+}
+
+int
+nfs4_state_init(void)
+{
+	int status;
+
+	if (nfs4_init)
+		return 0;
+	status = nfsd4_init_slabs();
+	if (status)
+		return status;
+	__nfs4_state_init();
+	nfs4_init = 1;
+	return 0;
+}
+
+int
+nfs4_in_grace(void)
+{
+	return get_seconds() < grace_end;
+}
+
+void
+set_no_grace(void)
+{
+	printk("NFSD: ERROR in reboot recovery.  State reclaims will fail.\n");
+	grace_end = get_seconds();
+}
+
+time_t
+nfs4_lease_time(void)
+{
+	return lease_time;
+}
+
+static void
+__nfs4_state_shutdown(void)
+{
+	int i;
+	struct nfs4_client *clp = NULL;
+	struct nfs4_delegation *dp = NULL;
+	struct nfs4_stateowner *sop = NULL;
+	struct list_head *pos, *next, reaplist;
+
+	list_for_each_safe(pos, next, &close_lru) {
+		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+		list_del(&sop->so_close_lru);
+		nfs4_put_stateowner(sop);
+	}
+
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		while (!list_empty(&conf_id_hashtbl[i])) {
+			clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+			expire_client(clp);
+		}
+		while (!list_empty(&unconf_str_hashtbl[i])) {
+			clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
+			expire_client(clp);
+		}
+	}
+	INIT_LIST_HEAD(&reaplist);
+	spin_lock(&recall_lock);
+	list_for_each_safe(pos, next, &del_recall_lru) {
+		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		list_move(&dp->dl_recall_lru, &reaplist);
+	}
+	spin_unlock(&recall_lock);
+	list_for_each_safe(pos, next, &reaplist) {
+		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		list_del_init(&dp->dl_recall_lru);
+		unhash_delegation(dp);
+	}
+
+	release_all_files();
+	cancel_delayed_work(&laundromat_work);
+	flush_scheduled_work();
+	nfs4_init = 0;
+	dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
+			list_add_perfile, list_del_perfile);
+	dprintk("NFSD: add_perclient %d del_perclient %d\n",
+			add_perclient, del_perclient);
+	dprintk("NFSD: alloc_file %d free_file %d\n",
+			alloc_file, free_file);
+	dprintk("NFSD: vfsopen %d vfsclose %d\n",
+			vfsopen, vfsclose);
+	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
+			alloc_delegation, free_delegation);
+
+}
+
+void
+nfs4_state_shutdown(void)
+{
+	nfs4_lock_state();
+	nfs4_release_reclaim();
+	__nfs4_state_shutdown();
+	nfsd4_free_slabs();
+	nfs4_unlock_state();
+}
+
+/*
+ * Called when leasetime is changed.
+ *
+ * if nfsd is not started, simply set the global lease.
+ *
+ * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
+ * e.g: boot_time is reset, existing nfs4_client structs are
+ * used to fill reclaim_str_hashtbl, then all state (except for the
+ * reclaim_str_hashtbl) is re-initialized.
+ *
+ * if the old lease time is greater than the new lease time, the grace
+ * period needs to be set to the old lease time to allow clients to reclaim
+ * their state. XXX - we may want to set the grace period == lease time
+ * after an initial grace period == old lease time
+ *
+ * if an error occurs in this process, the new lease is set, but the server
+ * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
+ * which means OPEN/LOCK/READ/WRITE will fail during grace period.
+ *
+ * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
+ * OPEN and LOCK reclaims.
+ */
+void
+nfs4_reset_lease(time_t leasetime)
+{
+	struct nfs4_client *clp;
+	int i;
+
+	printk("NFSD: New leasetime %ld\n",leasetime);
+	if (!nfs4_init)
+		return;
+	nfs4_lock_state();
+	old_lease_time = lease_time;
+	lease_time = leasetime;
+
+	nfs4_release_reclaim();
+
+	/* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
+	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+		list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
+			if (!nfs4_client_to_reclaim(clp->cl_name.data,
+						clp->cl_name.len)) {
+				nfs4_release_reclaim();
+				goto init_state;
+			}
+		}
+	}
+init_state:
+	__nfs4_state_shutdown();
+	__nfs4_state_init();
+	nfs4_unlock_state();
+}
+
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
new file mode 100644
index 000000000000..36a058a112d5
--- /dev/null
+++ b/fs/nfsd/nfs4xdr.c
@@ -0,0 +1,2536 @@
+/*
+ *  fs/nfs/nfs4xdr.c
+ *
+ *  Server-side XDR for NFSv4
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * TODO: Neil Brown made the following observation:  We currently
+ * initially reserve NFSD_BUFSIZE space on the transmit queue and
+ * never release any of that until the request is complete.
+ * It would be good to calculate a new maximum response size while
+ * decoding the COMPOUND, and call svc_reserve with this number
+ * at the end of nfs4svc_decode_compoundargs.
+ */
+
+#include <linux/param.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/nfs4.h>
+#include <linux/nfs4_acl.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_XDR
+
+static int
+check_filename(char *str, int len, int err)
+{
+	int i;
+
+	if (len == 0)
+		return nfserr_inval;
+	if (isdotent(str, len))
+		return err;
+	for (i = 0; i < len; i++)
+		if (str[i] == '/')
+			return err;
+	return 0;
+}
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define DECODE_HEAD				\
+	u32 *p;					\
+	int status
+#define DECODE_TAIL				\
+	status = 0;				\
+out:						\
+	return status;				\
+xdr_error:					\
+	printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);	\
+	status = nfserr_bad_xdr;		\
+	goto out
+
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {			\
+	(x) = (u64)ntohl(*p++) << 32;		\
+	(x) |= ntohl(*p++);			\
+} while (0)
+#define READTIME(x)       do {			\
+	p++;					\
+	(x) = ntohl(*p++);			\
+	p++;					\
+} while (0)
+#define READMEM(x,nbytes) do {			\
+	x = (char *)p;				\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+#define SAVEMEM(x,nbytes) do {			\
+	if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
+ 		savemem(argp, p, nbytes) :	\
+ 		(char *)p)) {			\
+		printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+		goto xdr_error;			\
+		}				\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+#define COPYMEM(x,nbytes) do {			\
+	memcpy((x), p, nbytes);			\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
+#define READ_BUF(nbytes)  do {			\
+	if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) {	\
+		p = argp->p;			\
+		argp->p += XDR_QUADLEN(nbytes);	\
+	} else if (!(p = read_buf(argp, nbytes))) { \
+		printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+		goto xdr_error;			\
+	}					\
+} while (0)
+
+u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+{
+	/* We want more bytes than seem to be available.
+	 * Maybe we need a new page, maybe we have just run out
+	 */
+	int avail = (char*)argp->end - (char*)argp->p;
+	u32 *p;
+	if (avail + argp->pagelen < nbytes)
+		return NULL;
+	if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
+		return NULL;
+	/* ok, we can do it with the current plus the next page */
+	if (nbytes <= sizeof(argp->tmp))
+		p = argp->tmp;
+	else {
+		if (argp->tmpp)
+			kfree(argp->tmpp);
+		p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
+		if (!p)
+			return NULL;
+		
+	}
+	memcpy(p, argp->p, avail);
+	/* step to next page */
+	argp->p = page_address(argp->pagelist[0]);
+	argp->pagelist++;
+	if (argp->pagelen < PAGE_SIZE) {
+		argp->end = p + (argp->pagelen>>2);
+		argp->pagelen = 0;
+	} else {
+		argp->end = p + (PAGE_SIZE>>2);
+		argp->pagelen -= PAGE_SIZE;
+	}
+	memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
+	argp->p += XDR_QUADLEN(nbytes - avail);
+	return p;
+}
+
+static int
+defer_free(struct nfsd4_compoundargs *argp,
+		void (*release)(const void *), void *p)
+{
+	struct tmpbuf *tb;
+
+	tb = kmalloc(sizeof(*tb), GFP_KERNEL);
+	if (!tb)
+		return -ENOMEM;
+	tb->buf = p;
+	tb->release = release;
+	tb->next = argp->to_free;
+	argp->to_free = tb;
+	return 0;
+}
+
+char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+{
+	void *new = NULL;
+	if (p == argp->tmp) {
+		new = kmalloc(nbytes, GFP_KERNEL);
+		if (!new) return NULL;
+		p = new;
+		memcpy(p, argp->tmp, nbytes);
+	} else {
+		if (p != argp->tmpp)
+			BUG();
+		argp->tmpp = NULL;
+	}
+	if (defer_free(argp, kfree, p)) {
+		kfree(new);
+		return NULL;
+	} else
+		return (char *)p;
+}
+
+
+static int
+nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
+{
+	u32 bmlen;
+	DECODE_HEAD;
+
+	bmval[0] = 0;
+	bmval[1] = 0;
+
+	READ_BUF(4);
+	READ32(bmlen);
+	if (bmlen > 1000)
+		goto xdr_error;
+
+	READ_BUF(bmlen << 2);
+	if (bmlen > 0)
+		READ32(bmval[0]);
+	if (bmlen > 1)
+		READ32(bmval[1]);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr,
+    struct nfs4_acl **acl)
+{
+	int expected_len, len = 0;
+	u32 dummy32;
+	char *buf;
+
+	DECODE_HEAD;
+	iattr->ia_valid = 0;
+	if ((status = nfsd4_decode_bitmap(argp, bmval)))
+		return status;
+
+	/*
+	 * According to spec, unsupported attributes return ERR_NOTSUPP;
+	 * read-only attributes return ERR_INVAL.
+	 */
+	if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
+		return nfserr_attrnotsupp;
+	if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1))
+		return nfserr_inval;
+
+	READ_BUF(4);
+	READ32(expected_len);
+
+	if (bmval[0] & FATTR4_WORD0_SIZE) {
+		READ_BUF(8);
+		len += 8;
+		READ64(iattr->ia_size);
+		iattr->ia_valid |= ATTR_SIZE;
+	}
+	if (bmval[0] & FATTR4_WORD0_ACL) {
+		int nace, i;
+		struct nfs4_ace ace;
+
+		READ_BUF(4); len += 4;
+		READ32(nace);
+
+		*acl = nfs4_acl_new();
+		if (*acl == NULL) {
+			status = -ENOMEM;
+			goto out_nfserr;
+		}
+		defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl);
+
+		for (i = 0; i < nace; i++) {
+			READ_BUF(16); len += 16;
+			READ32(ace.type);
+			READ32(ace.flag);
+			READ32(ace.access_mask);
+			READ32(dummy32);
+			READ_BUF(dummy32);
+			len += XDR_QUADLEN(dummy32) << 2;
+			READMEM(buf, dummy32);
+			ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
+			status = 0;
+			if (ace.whotype != NFS4_ACL_WHO_NAMED)
+				ace.who = 0;
+			else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP)
+				status = nfsd_map_name_to_gid(argp->rqstp,
+						buf, dummy32, &ace.who);
+			else
+				status = nfsd_map_name_to_uid(argp->rqstp,
+						buf, dummy32, &ace.who);
+			if (status)
+				goto out_nfserr;
+			if (nfs4_acl_add_ace(*acl, ace.type, ace.flag,
+				 ace.access_mask, ace.whotype, ace.who) != 0) {
+				status = -ENOMEM;
+				goto out_nfserr;
+			}
+		}
+	} else
+		*acl = NULL;
+	if (bmval[1] & FATTR4_WORD1_MODE) {
+		READ_BUF(4);
+		len += 4;
+		READ32(iattr->ia_mode);
+		iattr->ia_mode &= (S_IFMT | S_IALLUGO);
+		iattr->ia_valid |= ATTR_MODE;
+	}
+	if (bmval[1] & FATTR4_WORD1_OWNER) {
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32);
+		READ_BUF(dummy32);
+		len += (XDR_QUADLEN(dummy32) << 2);
+		READMEM(buf, dummy32);
+		if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+			goto out_nfserr;
+		iattr->ia_valid |= ATTR_UID;
+	}
+	if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32);
+		READ_BUF(dummy32);
+		len += (XDR_QUADLEN(dummy32) << 2);
+		READMEM(buf, dummy32);
+		if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+			goto out_nfserr;
+		iattr->ia_valid |= ATTR_GID;
+	}
+	if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32);
+		switch (dummy32) {
+		case NFS4_SET_TO_CLIENT_TIME:
+			/* We require the high 32 bits of 'seconds' to be 0, and we ignore
+			   all 32 bits of 'nseconds'. */
+			READ_BUF(12);
+			len += 12;
+			READ32(dummy32);
+			if (dummy32)
+				return nfserr_inval;
+			READ32(iattr->ia_atime.tv_sec);
+			READ32(iattr->ia_atime.tv_nsec);
+			if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
+				return nfserr_inval;
+			iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
+			break;
+		case NFS4_SET_TO_SERVER_TIME:
+			iattr->ia_valid |= ATTR_ATIME;
+			break;
+		default:
+			goto xdr_error;
+		}
+	}
+	if (bmval[1] & FATTR4_WORD1_TIME_METADATA) {
+		/* We require the high 32 bits of 'seconds' to be 0, and we ignore
+		   all 32 bits of 'nseconds'. */
+		READ_BUF(12);
+		len += 12;
+		READ32(dummy32);
+		if (dummy32)
+			return nfserr_inval;
+		READ32(iattr->ia_ctime.tv_sec);
+		READ32(iattr->ia_ctime.tv_nsec);
+		if (iattr->ia_ctime.tv_nsec >= (u32)1000000000)
+			return nfserr_inval;
+		iattr->ia_valid |= ATTR_CTIME;
+	}
+	if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+		READ_BUF(4);
+		len += 4;
+		READ32(dummy32);
+		switch (dummy32) {
+		case NFS4_SET_TO_CLIENT_TIME:
+			/* We require the high 32 bits of 'seconds' to be 0, and we ignore
+			   all 32 bits of 'nseconds'. */
+			READ_BUF(12);
+			len += 12;
+			READ32(dummy32);
+			if (dummy32)
+				return nfserr_inval;
+			READ32(iattr->ia_mtime.tv_sec);
+			READ32(iattr->ia_mtime.tv_nsec);
+			if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
+				return nfserr_inval;
+			iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
+			break;
+		case NFS4_SET_TO_SERVER_TIME:
+			iattr->ia_valid |= ATTR_MTIME;
+			break;
+		default:
+			goto xdr_error;
+		}
+	}
+	if (len != expected_len)
+		goto xdr_error;
+
+	DECODE_TAIL;
+
+out_nfserr:
+	status = nfserrno(status);
+	goto out;
+}
+
+static int
+nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(access->ac_req_access);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+{
+	DECODE_HEAD;
+
+	close->cl_stateowner = NULL;
+	READ_BUF(4 + sizeof(stateid_t));
+	READ32(close->cl_seqid);
+	READ32(close->cl_stateid.si_generation);
+	COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+	DECODE_TAIL;
+}
+
+
+static int
+nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
+{
+	DECODE_HEAD;
+
+	READ_BUF(12);
+	READ64(commit->co_offset);
+	READ32(commit->co_count);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(create->cr_type);
+	switch (create->cr_type) {
+	case NF4LNK:
+		READ_BUF(4);
+		READ32(create->cr_linklen);
+		READ_BUF(create->cr_linklen);
+		SAVEMEM(create->cr_linkname, create->cr_linklen);
+		break;
+	case NF4BLK:
+	case NF4CHR:
+		READ_BUF(8);
+		READ32(create->cr_specdata1);
+		READ32(create->cr_specdata2);
+		break;
+	case NF4SOCK:
+	case NF4FIFO:
+	case NF4DIR:
+	default:
+		break;
+	}
+
+	READ_BUF(4);
+	READ32(create->cr_namelen);
+	READ_BUF(create->cr_namelen);
+	SAVEMEM(create->cr_name, create->cr_namelen);
+	if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
+		return status;
+
+	if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl)))
+		goto out;
+
+	DECODE_TAIL;
+}
+
+static inline int
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
+{
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(stateid_t));
+	READ32(dr->dr_stateid.si_generation);
+	COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+	DECODE_TAIL;
+}
+
+static inline int
+nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
+{
+	return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
+}
+
+static int
+nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(link->li_namelen);
+	READ_BUF(link->li_namelen);
+	SAVEMEM(link->li_name, link->li_namelen);
+	if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval)))
+		return status;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+{
+	DECODE_HEAD;
+
+	lock->lk_stateowner = NULL;
+	/*
+	* type, reclaim(boolean), offset, length, new_lock_owner(boolean)
+	*/
+	READ_BUF(28);
+	READ32(lock->lk_type);
+	if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
+		goto xdr_error;
+	READ32(lock->lk_reclaim);
+	READ64(lock->lk_offset);
+	READ64(lock->lk_length);
+	READ32(lock->lk_is_new);
+
+	if (lock->lk_is_new) {
+		READ_BUF(36);
+		READ32(lock->lk_new_open_seqid);
+		READ32(lock->lk_new_open_stateid.si_generation);
+
+		COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t));
+		READ32(lock->lk_new_lock_seqid);
+		COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
+		READ32(lock->lk_new_owner.len);
+		READ_BUF(lock->lk_new_owner.len);
+		READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
+	} else {
+		READ_BUF(20);
+		READ32(lock->lk_old_lock_stateid.si_generation);
+		COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t));
+		READ32(lock->lk_old_lock_seqid);
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+{
+	DECODE_HEAD;
+		        
+	READ_BUF(32);
+	READ32(lockt->lt_type);
+	if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+		goto xdr_error;
+	READ64(lockt->lt_offset);
+	READ64(lockt->lt_length);
+	COPYMEM(&lockt->lt_clientid, 8);
+	READ32(lockt->lt_owner.len);
+	READ_BUF(lockt->lt_owner.len);
+	READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+{
+	DECODE_HEAD;
+
+	locku->lu_stateowner = NULL;
+	READ_BUF(24 + sizeof(stateid_t));
+	READ32(locku->lu_type);
+	if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
+		goto xdr_error;
+	READ32(locku->lu_seqid);
+	READ32(locku->lu_stateid.si_generation);
+	COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t));
+	READ64(locku->lu_offset);
+	READ64(locku->lu_length);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(lookup->lo_len);
+	READ_BUF(lookup->lo_len);
+	SAVEMEM(lookup->lo_name, lookup->lo_len);
+	if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent)))
+		return status;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+	DECODE_HEAD;
+
+	memset(open->op_bmval, 0, sizeof(open->op_bmval));
+	open->op_iattr.ia_valid = 0;
+	open->op_stateowner = NULL;
+
+	/* seqid, share_access, share_deny, clientid, ownerlen */
+	READ_BUF(16 + sizeof(clientid_t));
+	READ32(open->op_seqid);
+	READ32(open->op_share_access);
+	READ32(open->op_share_deny);
+	COPYMEM(&open->op_clientid, sizeof(clientid_t));
+	READ32(open->op_owner.len);
+
+	/* owner, open_flag */
+	READ_BUF(open->op_owner.len + 4);
+	SAVEMEM(open->op_owner.data, open->op_owner.len);
+	READ32(open->op_create);
+	switch (open->op_create) {
+	case NFS4_OPEN_NOCREATE:
+		break;
+	case NFS4_OPEN_CREATE:
+		READ_BUF(4);
+		READ32(open->op_createmode);
+		switch (open->op_createmode) {
+		case NFS4_CREATE_UNCHECKED:
+		case NFS4_CREATE_GUARDED:
+			if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl)))
+				goto out;
+			break;
+		case NFS4_CREATE_EXCLUSIVE:
+			READ_BUF(8);
+			COPYMEM(open->op_verf.data, 8);
+			break;
+		default:
+			goto xdr_error;
+		}
+		break;
+	default:
+		goto xdr_error;
+	}
+
+	/* open_claim */
+	READ_BUF(4);
+	READ32(open->op_claim_type);
+	switch (open->op_claim_type) {
+	case NFS4_OPEN_CLAIM_NULL:
+	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+		READ_BUF(4);
+		READ32(open->op_fname.len);
+		READ_BUF(open->op_fname.len);
+		SAVEMEM(open->op_fname.data, open->op_fname.len);
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+			return status;
+		break;
+	case NFS4_OPEN_CLAIM_PREVIOUS:
+		READ_BUF(4);
+		READ32(open->op_delegate_type);
+		break;
+	case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+		READ_BUF(sizeof(stateid_t) + 4);
+		COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t));
+		READ32(open->op_fname.len);
+		READ_BUF(open->op_fname.len);
+		SAVEMEM(open->op_fname.data, open->op_fname.len);
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+			return status;
+		break;
+	default:
+		goto xdr_error;
+	}
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
+{
+	DECODE_HEAD;
+		    
+	open_conf->oc_stateowner = NULL;
+	READ_BUF(4 + sizeof(stateid_t));
+	READ32(open_conf->oc_req_stateid.si_generation);
+	COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t));
+	READ32(open_conf->oc_seqid);
+						        
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
+{
+	DECODE_HEAD;
+		    
+	open_down->od_stateowner = NULL;
+	READ_BUF(12 + sizeof(stateid_t));
+	READ32(open_down->od_stateid.si_generation);
+	COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t));
+	READ32(open_down->od_seqid);
+	READ32(open_down->od_share_access);
+	READ32(open_down->od_share_deny);
+						        
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(putfh->pf_fhlen);
+	if (putfh->pf_fhlen > NFS4_FHSIZE)
+		goto xdr_error;
+	READ_BUF(putfh->pf_fhlen);
+	SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+{
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(stateid_t) + 12);
+	READ32(read->rd_stateid.si_generation);
+	COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t));
+	READ64(read->rd_offset);
+	READ32(read->rd_length);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
+{
+	DECODE_HEAD;
+
+	READ_BUF(24);
+	READ64(readdir->rd_cookie);
+	COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
+	READ32(readdir->rd_dircount);    /* just in case you needed a useless field... */
+	READ32(readdir->rd_maxcount);
+	if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
+		goto out;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(remove->rm_namelen);
+	READ_BUF(remove->rm_namelen);
+	SAVEMEM(remove->rm_name, remove->rm_namelen);
+	if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent)))
+		return status;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(rename->rn_snamelen);
+	READ_BUF(rename->rn_snamelen + 4);
+	SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+	READ32(rename->rn_tnamelen);
+	READ_BUF(rename->rn_tnamelen);
+	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
+	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent)))
+		return status;
+	if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval)))
+		return status;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+{
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(clientid_t));
+	COPYMEM(clientid, sizeof(clientid_t));
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+{
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(stateid_t));
+	READ32(setattr->sa_stateid.si_generation);
+	COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t));
+	if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl)))
+		goto out;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
+{
+	DECODE_HEAD;
+
+	READ_BUF(12);
+	COPYMEM(setclientid->se_verf.data, 8);
+	READ32(setclientid->se_namelen);
+
+	READ_BUF(setclientid->se_namelen + 8);
+	SAVEMEM(setclientid->se_name, setclientid->se_namelen);
+	READ32(setclientid->se_callback_prog);
+	READ32(setclientid->se_callback_netid_len);
+
+	READ_BUF(setclientid->se_callback_netid_len + 4);
+	SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+	READ32(setclientid->se_callback_addr_len);
+
+	READ_BUF(setclientid->se_callback_addr_len + 4);
+	SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+	READ32(setclientid->se_callback_ident);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
+{
+	DECODE_HEAD;
+
+	READ_BUF(8 + sizeof(nfs4_verifier));
+	COPYMEM(&scd_c->sc_clientid, 8);
+	COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier));
+
+	DECODE_TAIL;
+}
+
+/* Also used for NVERIFY */
+static int
+nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
+{
+#if 0
+	struct nfsd4_compoundargs save = {
+		.p = argp->p,
+		.end = argp->end,
+		.rqstp = argp->rqstp,
+	};
+	u32             ve_bmval[2];
+	struct iattr    ve_iattr;           /* request */
+	struct nfs4_acl *ve_acl;            /* request */
+#endif
+	DECODE_HEAD;
+
+	if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
+		goto out;
+
+	/* For convenience's sake, we compare raw xdr'd attributes in
+	 * nfsd4_proc_verify; however we still decode here just to return
+	 * correct error in case of bad xdr. */
+#if 0
+	status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl);
+	if (status == nfserr_inval) {
+		status = nfserrno(status);
+		goto out;
+	}
+#endif
+	READ_BUF(4);
+	READ32(verify->ve_attrlen);
+	READ_BUF(verify->ve_attrlen);
+	SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+{
+	int avail;
+	int v;
+	int len;
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(stateid_opaque_t) + 20);
+	READ32(write->wr_stateid.si_generation);
+	COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t));
+	READ64(write->wr_offset);
+	READ32(write->wr_stable_how);
+	if (write->wr_stable_how > 2)
+		goto xdr_error;
+	READ32(write->wr_buflen);
+
+	/* Sorry .. no magic macros for this.. *
+	 * READ_BUF(write->wr_buflen);
+	 * SAVEMEM(write->wr_buf, write->wr_buflen);
+	 */
+	avail = (char*)argp->end - (char*)argp->p;
+	if (avail + argp->pagelen < write->wr_buflen) {
+		printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); 
+		goto xdr_error;
+	}
+	write->wr_vec[0].iov_base = p;
+	write->wr_vec[0].iov_len = avail;
+	v = 0;
+	len = write->wr_buflen;
+	while (len > write->wr_vec[v].iov_len) {
+		len -= write->wr_vec[v].iov_len;
+		v++;
+		write->wr_vec[v].iov_base = page_address(argp->pagelist[0]);
+		argp->pagelist++;
+		if (argp->pagelen >= PAGE_SIZE) {
+			write->wr_vec[v].iov_len = PAGE_SIZE;
+			argp->pagelen -= PAGE_SIZE;
+		} else {
+			write->wr_vec[v].iov_len = argp->pagelen;
+			argp->pagelen -= len;
+		}
+	}
+	argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len);
+	argp->p = (u32*)  (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+	write->wr_vec[v].iov_len = len;
+	write->wr_vlen = v+1;
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
+{
+	DECODE_HEAD;
+
+	READ_BUF(12);
+	COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
+	READ32(rlockowner->rl_owner.len);
+	READ_BUF(rlockowner->rl_owner.len);
+	READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
+
+	DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+{
+	DECODE_HEAD;
+	struct nfsd4_op *op;
+	int i;
+
+	/*
+	 * XXX: According to spec, we should check the tag
+	 * for UTF-8 compliance.  I'm postponing this for
+	 * now because it seems that some clients do use
+	 * binary tags.
+	 */
+	READ_BUF(4);
+	READ32(argp->taglen);
+	READ_BUF(argp->taglen + 8);
+	SAVEMEM(argp->tag, argp->taglen);
+	READ32(argp->minorversion);
+	READ32(argp->opcnt);
+
+	if (argp->taglen > NFSD4_MAX_TAGLEN)
+		goto xdr_error;
+	if (argp->opcnt > 100)
+		goto xdr_error;
+
+	if (argp->opcnt > sizeof(argp->iops)/sizeof(argp->iops[0])) {
+		argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+		if (!argp->ops) {
+			argp->ops = argp->iops;
+			printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n");
+			goto xdr_error;
+		}
+	}
+
+	for (i = 0; i < argp->opcnt; i++) {
+		op = &argp->ops[i];
+		op->replay = NULL;
+
+		/*
+		 * We can't use READ_BUF() here because we need to handle
+		 * a missing opcode as an OP_WRITE + 1. So we need to check
+		 * to see if we're truly at the end of our buffer or if there
+		 * is another page we need to flip to.
+		 */
+
+		if (argp->p == argp->end) {
+			if (argp->pagelen < 4) {
+				/* There isn't an opcode still on the wire */
+				op->opnum = OP_WRITE + 1;
+				op->status = nfserr_bad_xdr;
+				argp->opcnt = i+1;
+				break;
+			}
+
+			/*
+			 * False alarm. We just hit a page boundary, but there
+			 * is still data available.  Move pointer across page
+			 * boundary.  *snip from READ_BUF*
+			 */
+			argp->p = page_address(argp->pagelist[0]);
+			argp->pagelist++;
+			if (argp->pagelen < PAGE_SIZE) {
+				argp->end = p + (argp->pagelen>>2);
+				argp->pagelen = 0;
+			} else {
+				argp->end = p + (PAGE_SIZE>>2);
+				argp->pagelen -= PAGE_SIZE;
+			}
+		}
+		op->opnum = ntohl(*argp->p++);
+
+		switch (op->opnum) {
+		case 2: /* Reserved operation */
+			op->opnum = OP_ILLEGAL;
+			if (argp->minorversion == 0)
+				op->status = nfserr_op_illegal;
+			else
+				op->status = nfserr_minor_vers_mismatch;
+			break;
+		case OP_ACCESS:
+			op->status = nfsd4_decode_access(argp, &op->u.access);
+			break;
+		case OP_CLOSE:
+			op->status = nfsd4_decode_close(argp, &op->u.close);
+			break;
+		case OP_COMMIT:
+			op->status = nfsd4_decode_commit(argp, &op->u.commit);
+			break;
+		case OP_CREATE:
+			op->status = nfsd4_decode_create(argp, &op->u.create);
+			break;
+		case OP_DELEGRETURN:
+			op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
+			break;
+		case OP_GETATTR:
+			op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
+			break;
+		case OP_GETFH:
+			op->status = nfs_ok;
+			break;
+		case OP_LINK:
+			op->status = nfsd4_decode_link(argp, &op->u.link);
+			break;
+		case OP_LOCK:
+			op->status = nfsd4_decode_lock(argp, &op->u.lock);
+			break;
+		case OP_LOCKT:
+			op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
+			break;
+		case OP_LOCKU:
+			op->status = nfsd4_decode_locku(argp, &op->u.locku);
+			break;
+		case OP_LOOKUP:
+			op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
+			break;
+		case OP_LOOKUPP:
+			op->status = nfs_ok;
+			break;
+		case OP_NVERIFY:
+			op->status = nfsd4_decode_verify(argp, &op->u.nverify);
+			break;
+		case OP_OPEN:
+			op->status = nfsd4_decode_open(argp, &op->u.open);
+			break;
+		case OP_OPEN_CONFIRM:
+			op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
+			break;
+		case OP_OPEN_DOWNGRADE:
+			op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
+			break;
+		case OP_PUTFH:
+			op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
+			break;
+		case OP_PUTROOTFH:
+			op->status = nfs_ok;
+			break;
+		case OP_READ:
+			op->status = nfsd4_decode_read(argp, &op->u.read);
+			break;
+		case OP_READDIR:
+			op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
+			break;
+		case OP_READLINK:
+			op->status = nfs_ok;
+			break;
+		case OP_REMOVE:
+			op->status = nfsd4_decode_remove(argp, &op->u.remove);
+			break;
+		case OP_RENAME:
+			op->status = nfsd4_decode_rename(argp, &op->u.rename);
+			break;
+		case OP_RESTOREFH:
+			op->status = nfs_ok;
+			break;
+		case OP_RENEW:
+			op->status = nfsd4_decode_renew(argp, &op->u.renew);
+			break;
+		case OP_SAVEFH:
+			op->status = nfs_ok;
+			break;
+		case OP_SETATTR:
+			op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
+			break;
+		case OP_SETCLIENTID:
+			op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
+			break;
+		case OP_SETCLIENTID_CONFIRM:
+			op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
+			break;
+		case OP_VERIFY:
+			op->status = nfsd4_decode_verify(argp, &op->u.verify);
+			break;
+		case OP_WRITE:
+			op->status = nfsd4_decode_write(argp, &op->u.write);
+			break;
+		case OP_RELEASE_LOCKOWNER:
+			op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
+			break;
+		default:
+			op->opnum = OP_ILLEGAL;
+			op->status = nfserr_op_illegal;
+			break;
+		}
+
+		if (op->status) {
+			argp->opcnt = i+1;
+			break;
+		}
+	}
+
+	DECODE_TAIL;
+}
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ *   These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define ENCODE_HEAD              u32 *p
+
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITE64(n)               do {				\
+	*p++ = htonl((u32)((n) >> 32));				\
+	*p++ = htonl((u32)(n));					\
+} while (0)
+#define WRITEMEM(ptr,nbytes)     do {				\
+	*(p + XDR_QUADLEN(nbytes) -1) = 0;                      \
+	memcpy(p, ptr, nbytes);					\
+	p += XDR_QUADLEN(nbytes);				\
+} while (0)
+#define WRITECINFO(c)		do {				\
+	*p++ = htonl(c.atomic);					\
+	*p++ = htonl(c.before_ctime_sec);				\
+	*p++ = htonl(c.before_ctime_nsec);				\
+	*p++ = htonl(c.after_ctime_sec);				\
+	*p++ = htonl(c.after_ctime_nsec);				\
+} while (0)
+
+#define RESERVE_SPACE(nbytes)	do {				\
+	p = resp->p;						\
+	BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end);		\
+} while (0)
+#define ADJUST_ARGS()		resp->p = p
+
+/*
+ * Header routine to setup seqid operation replay cache
+ */
+#define ENCODE_SEQID_OP_HEAD					\
+	u32 *p;							\
+	u32 *save;						\
+								\
+	save = resp->p;
+
+/*
+ * Routine for encoding the result of a
+ * "seqid-mutating" NFSv4 operation.  This is
+ * where seqids are incremented, and the
+ * replay cache is filled.
+ */
+
+#define ENCODE_SEQID_OP_TAIL(stateowner) do {			\
+	if (seqid_mutating_err(nfserr) && stateowner) { 	\
+		if (stateowner->so_confirmed)			\
+			stateowner->so_seqid++;			\
+		stateowner->so_replay.rp_status = nfserr;   	\
+		stateowner->so_replay.rp_buflen = 		\
+			  (((char *)(resp)->p - (char *)save)); \
+		memcpy(stateowner->so_replay.rp_buf, save,      \
+ 			stateowner->so_replay.rp_buflen); 	\
+	} } while (0);
+
+
+static u32 nfs4_ftypes[16] = {
+        NF4BAD,  NF4FIFO, NF4CHR, NF4BAD,
+        NF4DIR,  NF4BAD,  NF4BLK, NF4BAD,
+        NF4REG,  NF4BAD,  NF4LNK, NF4BAD,
+        NF4SOCK, NF4BAD,  NF4LNK, NF4BAD,
+};
+
+static int
+nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+			u32 **p, int *buflen)
+{
+	int status;
+
+	if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4)
+		return nfserr_resource;
+	if (whotype != NFS4_ACL_WHO_NAMED)
+		status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
+	else if (group)
+		status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1));
+	else
+		status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1));
+	if (status < 0)
+		return nfserrno(status);
+	*p = xdr_encode_opaque(*p, NULL, status);
+	*buflen -= (XDR_QUADLEN(status) << 2) + 4;
+	BUG_ON(*buflen < 0);
+	return 0;
+}
+
+static inline int
+nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, u32 **p, int *buflen)
+{
+	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
+}
+
+static inline int
+nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, u32 **p, int *buflen)
+{
+	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
+}
+
+static inline int
+nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+		u32 **p, int *buflen)
+{
+	return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
+}
+
+
+/*
+ * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
+ * ourselves.
+ *
+ * @countp is the buffer size in _words_; upon successful return this becomes
+ * replaced with the number of words written.
+ */
+int
+nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+		struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval,
+		struct svc_rqst *rqstp)
+{
+	u32 bmval0 = bmval[0];
+	u32 bmval1 = bmval[1];
+	struct kstat stat;
+	struct svc_fh tempfh;
+	struct kstatfs statfs;
+	int buflen = *countp << 2;
+	u32 *attrlenp;
+	u32 dummy;
+	u64 dummy64;
+	u32 *p = buffer;
+	int status;
+	int aclsupport = 0;
+	struct nfs4_acl *acl = NULL;
+
+	BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
+	BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
+	BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
+
+	status = vfs_getattr(exp->ex_mnt, dentry, &stat);
+	if (status)
+		goto out_nfserr;
+	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
+	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+		       FATTR4_WORD1_SPACE_TOTAL))) {
+		status = vfs_statfs(dentry->d_inode->i_sb, &statfs);
+		if (status)
+			goto out_nfserr;
+	}
+	if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
+		fh_init(&tempfh, NFS4_FHSIZE);
+		status = fh_compose(&tempfh, exp, dentry, NULL);
+		if (status)
+			goto out;
+		fhp = &tempfh;
+	}
+	if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
+			| FATTR4_WORD0_SUPPORTED_ATTRS)) {
+		status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
+		aclsupport = (status == 0);
+		if (bmval0 & FATTR4_WORD0_ACL) {
+			if (status == -EOPNOTSUPP)
+				bmval0 &= ~FATTR4_WORD0_ACL;
+			else if (status == -EINVAL) {
+				status = nfserr_attrnotsupp;
+				goto out;
+			} else if (status != 0)
+				goto out_nfserr;
+		}
+	}
+	if ((buflen -= 16) < 0)
+		goto out_resource;
+
+	WRITE32(2);
+	WRITE32(bmval0);
+	WRITE32(bmval1);
+	attrlenp = p++;                /* to be backfilled later */
+
+	if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+		if ((buflen -= 12) < 0)
+			goto out_resource;
+		WRITE32(2);
+		WRITE32(aclsupport ?
+			NFSD_SUPPORTED_ATTRS_WORD0 :
+			NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
+		WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
+	}
+	if (bmval0 & FATTR4_WORD0_TYPE) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12];
+		if (dummy == NF4BAD)
+			goto out_serverfault;
+		WRITE32(dummy);
+	}
+	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+	}
+	if (bmval0 & FATTR4_WORD0_CHANGE) {
+		/*
+		 * Note: This _must_ be consistent with the scheme for writing
+		 * change_info, so any changes made here must be reflected there
+		 * as well.  (See xdr4.h:set_change_info() and the WRITECINFO()
+		 * macro above.)
+		 */
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE32(stat.ctime.tv_sec);
+		WRITE32(stat.ctime.tv_nsec);
+	}
+	if (bmval0 & FATTR4_WORD0_SIZE) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64(stat.size);
+	}
+	if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_NAMED_ATTR) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(0);
+	}
+	if (bmval0 & FATTR4_WORD0_FSID) {
+		if ((buflen -= 16) < 0)
+			goto out_resource;
+		if (is_fsid(fhp, rqstp->rq_reffh)) {
+			WRITE64((u64)exp->ex_fsid);
+			WRITE64((u64)0);
+		} else {
+			WRITE32(0);
+			WRITE32(MAJOR(stat.dev));
+			WRITE32(0);
+			WRITE32(MINOR(stat.dev));
+		}
+	}
+	if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(0);
+	}
+	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(NFSD_LEASE_TIME);
+	}
+	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(0);
+	}
+	if (bmval0 & FATTR4_WORD0_ACL) {
+		struct nfs4_ace *ace;
+		struct list_head *h;
+
+		if (acl == NULL) {
+			if ((buflen -= 4) < 0)
+				goto out_resource;
+
+			WRITE32(0);
+			goto out_acl;
+		}
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(acl->naces);
+
+		list_for_each(h, &acl->ace_head) {
+			ace = list_entry(h, struct nfs4_ace, l_ace);
+
+			if ((buflen -= 4*3) < 0)
+				goto out_resource;
+			WRITE32(ace->type);
+			WRITE32(ace->flag);
+			WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
+			status = nfsd4_encode_aclname(rqstp, ace->whotype,
+				ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
+				&p, &buflen);
+			if (status == nfserr_resource)
+				goto out_resource;
+			if (status)
+				goto out;
+		}
+	}
+out_acl:
+	if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(aclsupport ?
+			ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
+	}
+	if (bmval0 & FATTR4_WORD0_CANSETTIME) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_FILEHANDLE) {
+		buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4;
+		if (buflen < 0)
+			goto out_resource;
+		WRITE32(fhp->fh_handle.fh_size);
+		WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size);
+	}
+	if (bmval0 & FATTR4_WORD0_FILEID) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64((u64) stat.ino);
+	}
+	if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64((u64) statfs.f_ffree);
+	}
+	if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64((u64) statfs.f_ffree);
+	}
+	if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64((u64) statfs.f_files);
+	}
+	if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64(~(u64)0);
+	}
+	if (bmval0 & FATTR4_WORD0_MAXLINK) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(255);
+	}
+	if (bmval0 & FATTR4_WORD0_MAXNAME) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(~(u32) 0);
+	}
+	if (bmval0 & FATTR4_WORD0_MAXREAD) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64((u64) NFSSVC_MAXBLKSIZE);
+	}
+	if (bmval0 & FATTR4_WORD0_MAXWRITE) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE64((u64) NFSSVC_MAXBLKSIZE);
+	}
+	if (bmval1 & FATTR4_WORD1_MODE) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(stat.mode & S_IALLUGO);
+	}
+	if (bmval1 & FATTR4_WORD1_NO_TRUNC) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(1);
+	}
+	if (bmval1 & FATTR4_WORD1_NUMLINKS) {
+		if ((buflen -= 4) < 0)
+			goto out_resource;
+		WRITE32(stat.nlink);
+	}
+	if (bmval1 & FATTR4_WORD1_OWNER) {
+		status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
+		if (status == nfserr_resource)
+			goto out_resource;
+		if (status)
+			goto out;
+	}
+	if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+		status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
+		if (status == nfserr_resource)
+			goto out_resource;
+		if (status)
+			goto out;
+	}
+	if (bmval1 & FATTR4_WORD1_RAWDEV) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		WRITE32((u32) MAJOR(stat.rdev));
+		WRITE32((u32) MINOR(stat.rdev));
+	}
+	if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize;
+		WRITE64(dummy64);
+	}
+	if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize;
+		WRITE64(dummy64);
+	}
+	if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize;
+		WRITE64(dummy64);
+	}
+	if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+		if ((buflen -= 8) < 0)
+			goto out_resource;
+		dummy64 = (u64)stat.blocks << 9;
+		WRITE64(dummy64);
+	}
+	if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+		if ((buflen -= 12) < 0)
+			goto out_resource;
+		WRITE32(0);
+		WRITE32(stat.atime.tv_sec);
+		WRITE32(stat.atime.tv_nsec);
+	}
+	if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+		if ((buflen -= 12) < 0)
+			goto out_resource;
+		WRITE32(0);
+		WRITE32(1);
+		WRITE32(0);
+	}
+	if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+		if ((buflen -= 12) < 0)
+			goto out_resource;
+		WRITE32(0);
+		WRITE32(stat.ctime.tv_sec);
+		WRITE32(stat.ctime.tv_nsec);
+	}
+	if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+		if ((buflen -= 12) < 0)
+			goto out_resource;
+		WRITE32(0);
+		WRITE32(stat.mtime.tv_sec);
+		WRITE32(stat.mtime.tv_nsec);
+	}
+	if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+		struct dentry *mnt_pnt, *mnt_root;
+
+		if ((buflen -= 8) < 0)
+                	goto out_resource;
+		mnt_root = exp->ex_mnt->mnt_root;
+		if (mnt_root->d_inode == dentry->d_inode) {
+			mnt_pnt = exp->ex_mnt->mnt_mountpoint;
+			WRITE64((u64) mnt_pnt->d_inode->i_ino);
+		} else
+                	WRITE64((u64) stat.ino);
+	}
+	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+	*countp = p - buffer;
+	status = nfs_ok;
+
+out:
+	nfs4_acl_free(acl);
+	if (fhp == &tempfh)
+		fh_put(&tempfh);
+	return status;
+out_nfserr:
+	status = nfserrno(status);
+	goto out;
+out_resource:
+	*countp = 0;
+	status = nfserr_resource;
+	goto out;
+out_serverfault:
+	status = nfserr_serverfault;
+	goto out;
+}
+
+static int
+nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
+		const char *name, int namlen, u32 *p, int *buflen)
+{
+	struct svc_export *exp = cd->rd_fhp->fh_export;
+	struct dentry *dentry;
+	int nfserr;
+
+	dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+	if (IS_ERR(dentry))
+		return nfserrno(PTR_ERR(dentry));
+
+	exp_get(exp);
+	if (d_mountpoint(dentry)) {
+		if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) {
+		/*
+		 * -EAGAIN is the only error returned from
+		 * nfsd_cross_mnt() and it indicates that an
+		 * up-call has  been initiated to fill in the export
+		 * options on exp.  When the answer comes back,
+		 * this call will be retried.
+		 */
+			nfserr = nfserr_dropit;
+			goto out_put;
+		}
+
+	}
+	nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+					cd->rd_rqstp);
+out_put:
+	dput(dentry);
+	exp_put(exp);
+	return nfserr;
+}
+
+static u32 *
+nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
+{
+	u32 *attrlenp;
+
+	if (buflen < 6)
+		return NULL;
+	*p++ = htonl(2);
+	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
+	*p++ = htonl(0);			 /* bmval1 */
+
+	attrlenp = p++;
+	*p++ = nfserr;       /* no htonl */
+	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+	return p;
+}
+
+static int
+nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
+		    loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
+	int buflen;
+	u32 *p = cd->buffer;
+	int nfserr = nfserr_toosmall;
+
+	/* In nfsv4, "." and ".." never make it onto the wire.. */
+	if (name && isdotent(name, namlen)) {
+		cd->common.err = nfs_ok;
+		return 0;
+	}
+
+	if (cd->offset)
+		xdr_encode_hyper(cd->offset, (u64) offset);
+
+	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
+	if (buflen < 0)
+		goto fail;
+
+	*p++ = xdr_one;                             /* mark entry present */
+	cd->offset = p;                             /* remember pointer */
+	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
+	p = xdr_encode_array(p, name, namlen);      /* name length & name */
+
+	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
+	switch (nfserr) {
+	case nfs_ok:
+		p += buflen;
+		break;
+	case nfserr_resource:
+		nfserr = nfserr_toosmall;
+		goto fail;
+	case nfserr_dropit:
+		goto fail;
+	default:
+		/*
+		 * If the client requested the RDATTR_ERROR attribute,
+		 * we stuff the error code into this attribute
+		 * and continue.  If this attribute was not requested,
+		 * then in accordance with the spec, we fail the
+		 * entire READDIR operation(!)
+		 */
+		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
+			goto fail;
+		nfserr = nfserr_toosmall;
+		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		if (p == NULL)
+			goto fail;
+	}
+	cd->buflen -= (p - cd->buffer);
+	cd->buffer = p;
+	cd->common.err = nfs_ok;
+	return 0;
+fail:
+	cd->common.err = nfserr;
+	return -EINVAL;
+}
+
+static void
+nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_access *access)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(8);
+		WRITE32(access->ac_supported);
+		WRITE32(access->ac_resp_access);
+		ADJUST_ARGS();
+	}
+}
+
+static void
+nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close)
+{
+	ENCODE_SEQID_OP_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(sizeof(stateid_t));
+		WRITE32(close->cl_stateid.si_generation);
+		WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
+		ADJUST_ARGS();
+	}
+	ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
+}
+
+
+static void
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_commit *commit)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(8);
+		WRITEMEM(commit->co_verf.data, 8);
+		ADJUST_ARGS();
+	}
+}
+
+static void
+nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create *create)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(32);
+		WRITECINFO(create->cr_cinfo);
+		WRITE32(2);
+		WRITE32(create->cr_bmval[0]);
+		WRITE32(create->cr_bmval[1]);
+		ADJUST_ARGS();
+	}
+}
+
+static int
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_getattr *getattr)
+{
+	struct svc_fh *fhp = getattr->ga_fhp;
+	int buflen;
+
+	if (nfserr)
+		return nfserr;
+
+	buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
+	nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
+				    resp->p, &buflen, getattr->ga_bmval,
+				    resp->rqstp);
+
+	if (!nfserr)
+		resp->p += buflen;
+	return nfserr;
+}
+
+static void
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, int nfserr, struct svc_fh *fhp)
+{
+	unsigned int len;
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		len = fhp->fh_handle.fh_size;
+		RESERVE_SPACE(len + 4);
+		WRITE32(len);
+		WRITEMEM(&fhp->fh_handle.fh_base, len);
+		ADJUST_ARGS();
+	}
+}
+
+/*
+* Including all fields other than the name, a LOCK4denied structure requires
+*   8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.
+*/
+static void
+nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0));
+	WRITE64(ld->ld_start);
+	WRITE64(ld->ld_length);
+	WRITE32(ld->ld_type);
+	if (ld->ld_sop) {
+		WRITEMEM(&ld->ld_clientid, 8);
+		WRITE32(ld->ld_sop->so_owner.len);
+		WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len);
+		kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner);
+	}  else {  /* non - nfsv4 lock in conflict, no clientid nor owner */
+		WRITE64((u64)0); /* clientid */
+		WRITE32(0); /* length of owner name */
+	}
+	ADJUST_ARGS();
+}
+
+static void
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock)
+{
+
+	ENCODE_SEQID_OP_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(4 + sizeof(stateid_t));
+		WRITE32(lock->lk_resp_stateid.si_generation);
+		WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t));
+		ADJUST_ARGS();
+	} else if (nfserr == nfserr_denied)
+		nfsd4_encode_lock_denied(resp, &lock->lk_denied);
+
+	ENCODE_SEQID_OP_TAIL(lock->lk_stateowner);
+}
+
+static void
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lockt *lockt)
+{
+	if (nfserr == nfserr_denied)
+		nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+}
+
+static void
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_locku *locku)
+{
+	ENCODE_SEQID_OP_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(sizeof(stateid_t));
+		WRITE32(locku->lu_stateid.si_generation);
+		WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t));
+		ADJUST_ARGS();
+	}
+				        
+	ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
+}
+
+
+static void
+nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link *link)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(20);
+		WRITECINFO(link->li_cinfo);
+		ADJUST_ARGS();
+	}
+}
+
+
+static void
+nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open)
+{
+	ENCODE_SEQID_OP_HEAD;
+
+	if (nfserr)
+		goto out;
+
+	RESERVE_SPACE(36 + sizeof(stateid_t));
+	WRITE32(open->op_stateid.si_generation);
+	WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t));
+	WRITECINFO(open->op_cinfo);
+	WRITE32(open->op_rflags);
+	WRITE32(2);
+	WRITE32(open->op_bmval[0]);
+	WRITE32(open->op_bmval[1]);
+	WRITE32(open->op_delegate_type);
+	ADJUST_ARGS();
+
+	switch (open->op_delegate_type) {
+	case NFS4_OPEN_DELEGATE_NONE:
+		break;
+	case NFS4_OPEN_DELEGATE_READ:
+		RESERVE_SPACE(20 + sizeof(stateid_t));
+		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
+		WRITE32(0);
+
+		/*
+		 * TODO: ACE's in delegations
+		 */
+		WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+		WRITE32(0);
+		WRITE32(0);
+		WRITE32(0);   /* XXX: is NULL principal ok? */
+		ADJUST_ARGS();
+		break;
+	case NFS4_OPEN_DELEGATE_WRITE:
+		RESERVE_SPACE(32 + sizeof(stateid_t));
+		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
+		WRITE32(0);
+
+		/*
+		 * TODO: space_limit's in delegations
+		 */
+		WRITE32(NFS4_LIMIT_SIZE);
+		WRITE32(~(u32)0);
+		WRITE32(~(u32)0);
+
+		/*
+		 * TODO: ACE's in delegations
+		 */
+		WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+		WRITE32(0);
+		WRITE32(0);
+		WRITE32(0);   /* XXX: is NULL principal ok? */
+		ADJUST_ARGS();
+		break;
+	default:
+		BUG();
+	}
+	/* XXX save filehandle here */
+out:
+	ENCODE_SEQID_OP_TAIL(open->op_stateowner);
+}
+
+static void
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc)
+{
+	ENCODE_SEQID_OP_HEAD;
+				        
+	if (!nfserr) {
+		RESERVE_SPACE(sizeof(stateid_t));
+		WRITE32(oc->oc_resp_stateid.si_generation);
+		WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t));
+		ADJUST_ARGS();
+	}
+
+	ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
+}
+
+static void
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od)
+{
+	ENCODE_SEQID_OP_HEAD;
+				        
+	if (!nfserr) {
+		RESERVE_SPACE(sizeof(stateid_t));
+		WRITE32(od->od_stateid.si_generation);
+		WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t));
+		ADJUST_ARGS();
+	}
+
+	ENCODE_SEQID_OP_TAIL(od->od_stateowner);
+}
+
+static int
+nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read)
+{
+	u32 eof;
+	int v, pn;
+	unsigned long maxcount; 
+	long len;
+	ENCODE_HEAD;
+
+	if (nfserr)
+		return nfserr;
+	if (resp->xbuf->page_len)
+		return nfserr_resource;
+
+	RESERVE_SPACE(8); /* eof flag and byte count */
+
+	maxcount = NFSSVC_MAXBLKSIZE;
+	if (maxcount > read->rd_length)
+		maxcount = read->rd_length;
+
+	len = maxcount;
+	v = 0;
+	while (len > 0) {
+		pn = resp->rqstp->rq_resused;
+		svc_take_page(resp->rqstp);
+		read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]);
+		read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE;
+		v++;
+		len -= PAGE_SIZE;
+	}
+	read->rd_vlen = v;
+
+	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+			read->rd_offset, read->rd_iov, read->rd_vlen,
+			&maxcount);
+
+	if (nfserr == nfserr_symlink)
+		nfserr = nfserr_inval;
+	if (nfserr)
+		return nfserr;
+	eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size);
+
+	WRITE32(eof);
+	WRITE32(maxcount);
+	ADJUST_ARGS();
+	resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+
+	resp->xbuf->page_len = maxcount;
+
+	/* read zero bytes -> don't set up tail */
+	if(!maxcount)
+		return 0;        
+
+	/* set up page for remaining responses */
+	svc_take_page(resp->rqstp);
+	resp->xbuf->tail[0].iov_base = 
+		page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+	resp->xbuf->tail[0].iov_len = 0;
+	resp->p = resp->xbuf->tail[0].iov_base;
+	resp->end = resp->p + PAGE_SIZE/4;
+
+	if (maxcount&3) {
+		*(resp->p)++ = 0;
+		resp->xbuf->tail[0].iov_base += maxcount&3;
+		resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+	}
+	return 0;
+}
+
+static int
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readlink *readlink)
+{
+	int maxcount;
+	char *page;
+	ENCODE_HEAD;
+
+	if (nfserr)
+		return nfserr;
+	if (resp->xbuf->page_len)
+		return nfserr_resource;
+
+	svc_take_page(resp->rqstp);
+	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+
+	maxcount = PAGE_SIZE;
+	RESERVE_SPACE(4);
+
+	/*
+	 * XXX: By default, the ->readlink() VFS op will truncate symlinks
+	 * if they would overflow the buffer.  Is this kosher in NFSv4?  If
+	 * not, one easy fix is: if ->readlink() precisely fills the buffer,
+	 * assume that truncation occurred, and return NFS4ERR_RESOURCE.
+	 */
+	nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount);
+	if (nfserr == nfserr_isdir)
+		return nfserr_inval;
+	if (nfserr)
+		return nfserr;
+
+	WRITE32(maxcount);
+	ADJUST_ARGS();
+	resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+
+	svc_take_page(resp->rqstp);
+	resp->xbuf->tail[0].iov_base = 
+		page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+	resp->xbuf->tail[0].iov_len = 0;
+	resp->p = resp->xbuf->tail[0].iov_base;
+	resp->end = resp->p + PAGE_SIZE/4;
+
+	resp->xbuf->page_len = maxcount;
+	if (maxcount&3) {
+		*(resp->p)++ = 0;
+		resp->xbuf->tail[0].iov_base += maxcount&3;
+		resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+	}
+	return 0;
+}
+
+static int
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readdir *readdir)
+{
+	int maxcount;
+	loff_t offset;
+	u32 *page, *savep;
+	ENCODE_HEAD;
+
+	if (nfserr)
+		return nfserr;
+	if (resp->xbuf->page_len)
+		return nfserr_resource;
+
+	RESERVE_SPACE(8);  /* verifier */
+	savep = p;
+
+	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
+	WRITE32(0);
+	WRITE32(0);
+	ADJUST_ARGS();
+	resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+
+	maxcount = PAGE_SIZE;
+	if (maxcount > readdir->rd_maxcount)
+		maxcount = readdir->rd_maxcount;
+
+	/*
+	 * Convert from bytes to words, account for the two words already
+	 * written, make sure to leave two words at the end for the next
+	 * pointer and eof field.
+	 */
+	maxcount = (maxcount >> 2) - 4;
+	if (maxcount < 0) {
+		nfserr =  nfserr_toosmall;
+		goto err_no_verf;
+	}
+
+	svc_take_page(resp->rqstp);
+	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	readdir->common.err = 0;
+	readdir->buflen = maxcount;
+	readdir->buffer = page;
+	readdir->offset = NULL;
+
+	offset = readdir->rd_cookie;
+	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
+			      &offset,
+			      &readdir->common, nfsd4_encode_dirent);
+	if (nfserr == nfs_ok &&
+	    readdir->common.err == nfserr_toosmall &&
+	    readdir->buffer == page) 
+		nfserr = nfserr_toosmall;
+	if (nfserr == nfserr_symlink)
+		nfserr = nfserr_notdir;
+	if (nfserr)
+		goto err_no_verf;
+
+	if (readdir->offset)
+		xdr_encode_hyper(readdir->offset, offset);
+
+	p = readdir->buffer;
+	*p++ = 0;	/* no more entries */
+	*p++ = htonl(readdir->common.err == nfserr_eof);
+	resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+
+	/* allocate a page for the tail */
+	svc_take_page(resp->rqstp);
+	resp->xbuf->tail[0].iov_base = 
+		page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+	resp->xbuf->tail[0].iov_len = 0;
+	resp->p = resp->xbuf->tail[0].iov_base;
+	resp->end = resp->p + PAGE_SIZE/4;
+
+	return 0;
+err_no_verf:
+	p = savep;
+	ADJUST_ARGS();
+	return nfserr;
+}
+
+static void
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_remove *remove)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(20);
+		WRITECINFO(remove->rm_cinfo);
+		ADJUST_ARGS();
+	}
+}
+
+static void
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rename *rename)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(40);
+		WRITECINFO(rename->rn_sinfo);
+		WRITECINFO(rename->rn_tinfo);
+		ADJUST_ARGS();
+	}
+}
+
+/*
+ * The SETATTR encode routine is special -- it always encodes a bitmap,
+ * regardless of the error status.
+ */
+static void
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setattr *setattr)
+{
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(12);
+	if (nfserr) {
+		WRITE32(2);
+		WRITE32(0);
+		WRITE32(0);
+	}
+	else {
+		WRITE32(2);
+		WRITE32(setattr->sa_bmval[0]);
+		WRITE32(setattr->sa_bmval[1]);
+	}
+	ADJUST_ARGS();
+}
+
+static void
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setclientid *scd)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(8 + sizeof(nfs4_verifier));
+		WRITEMEM(&scd->se_clientid, 8);
+		WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier));
+		ADJUST_ARGS();
+	}
+	else if (nfserr == nfserr_clid_inuse) {
+		RESERVE_SPACE(8);
+		WRITE32(0);
+		WRITE32(0);
+		ADJUST_ARGS();
+	}
+}
+
+static void
+nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_write *write)
+{
+	ENCODE_HEAD;
+
+	if (!nfserr) {
+		RESERVE_SPACE(16);
+		WRITE32(write->wr_bytes_written);
+		WRITE32(write->wr_how_written);
+		WRITEMEM(write->wr_verifier.data, 8);
+		ADJUST_ARGS();
+	}
+}
+
+void
+nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+{
+	u32 *statp;
+	ENCODE_HEAD;
+
+	RESERVE_SPACE(8);
+	WRITE32(op->opnum);
+	statp = p++;	/* to be backfilled at the end */
+	ADJUST_ARGS();
+
+	switch (op->opnum) {
+	case OP_ACCESS:
+		nfsd4_encode_access(resp, op->status, &op->u.access);
+		break;
+	case OP_CLOSE:
+		nfsd4_encode_close(resp, op->status, &op->u.close);
+		break;
+	case OP_COMMIT:
+		nfsd4_encode_commit(resp, op->status, &op->u.commit);
+		break;
+	case OP_CREATE:
+		nfsd4_encode_create(resp, op->status, &op->u.create);
+		break;
+	case OP_DELEGRETURN:
+		break;
+	case OP_GETATTR:
+		op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
+		break;
+	case OP_GETFH:
+		nfsd4_encode_getfh(resp, op->status, op->u.getfh);
+		break;
+	case OP_LINK:
+		nfsd4_encode_link(resp, op->status, &op->u.link);
+		break;
+	case OP_LOCK:
+		nfsd4_encode_lock(resp, op->status, &op->u.lock);
+		break;
+	case OP_LOCKT:
+		nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
+		break;
+	case OP_LOCKU:
+		nfsd4_encode_locku(resp, op->status, &op->u.locku);
+		break;
+	case OP_LOOKUP:
+		break;
+	case OP_LOOKUPP:
+		break;
+	case OP_NVERIFY:
+		break;
+	case OP_OPEN:
+		nfsd4_encode_open(resp, op->status, &op->u.open);
+		break;
+	case OP_OPEN_CONFIRM:
+		nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
+		break;
+	case OP_OPEN_DOWNGRADE:
+		nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
+		break;
+	case OP_PUTFH:
+		break;
+	case OP_PUTROOTFH:
+		break;
+	case OP_READ:
+		op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
+		break;
+	case OP_READDIR:
+		op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
+		break;
+	case OP_READLINK:
+		op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
+		break;
+	case OP_REMOVE:
+		nfsd4_encode_remove(resp, op->status, &op->u.remove);
+		break;
+	case OP_RENAME:
+		nfsd4_encode_rename(resp, op->status, &op->u.rename);
+		break;
+	case OP_RENEW:
+		break;
+	case OP_RESTOREFH:
+		break;
+	case OP_SAVEFH:
+		break;
+	case OP_SETATTR:
+		nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
+		break;
+	case OP_SETCLIENTID:
+		nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
+		break;
+	case OP_SETCLIENTID_CONFIRM:
+		break;
+	case OP_VERIFY:
+		break;
+	case OP_WRITE:
+		nfsd4_encode_write(resp, op->status, &op->u.write);
+		break;
+	case OP_RELEASE_LOCKOWNER:
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Note: We write the status directly, instead of using WRITE32(),
+	 * since it is already in network byte order.
+	 */
+	*statp = op->status;
+}
+
+/* 
+ * Encode the reply stored in the stateowner reply cache 
+ * 
+ * XDR note: do not encode rp->rp_buflen: the buffer contains the
+ * previously sent already encoded operation.
+ *
+ * called with nfs4_lock_state() held
+ */
+void
+nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+{
+	ENCODE_HEAD;
+	struct nfs4_replay *rp = op->replay;
+
+	BUG_ON(!rp);
+
+	RESERVE_SPACE(8);
+	WRITE32(op->opnum);
+	*p++ = rp->rp_status;  /* already xdr'ed */
+	ADJUST_ARGS();
+
+	RESERVE_SPACE(rp->rp_buflen);
+	WRITEMEM(rp->rp_buf, rp->rp_buflen);
+	ADJUST_ARGS();
+}
+
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+int
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+        return xdr_ressize_check(rqstp, p);
+}
+
+void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
+{
+	if (args->ops != args->iops) {
+		kfree(args->ops);
+		args->ops = args->iops;
+	}
+	if (args->tmpp) {
+		kfree(args->tmpp);
+		args->tmpp = NULL;
+	}
+	while (args->to_free) {
+		struct tmpbuf *tb = args->to_free;
+		args->to_free = tb->next;
+		tb->release(tb->buf);
+		kfree(tb);
+	}
+}
+
+int
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args)
+{
+	int status;
+
+	args->p = p;
+	args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
+	args->pagelist = rqstp->rq_arg.pages;
+	args->pagelen = rqstp->rq_arg.page_len;
+	args->tmpp = NULL;
+	args->to_free = NULL;
+	args->ops = args->iops;
+	args->rqstp = rqstp;
+
+	status = nfsd4_decode_compound(args);
+	if (status) {
+		nfsd4_release_compoundargs(args);
+	}
+	return !status;
+}
+
+int
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundres *resp)
+{
+	/*
+	 * All that remains is to write the tag and operation count...
+	 */
+	struct kvec *iov;
+	p = resp->tagp;
+	*p++ = htonl(resp->taglen);
+	memcpy(p, resp->tag, resp->taglen);
+	p += XDR_QUADLEN(resp->taglen);
+	*p++ = htonl(resp->opcnt);
+
+	if (rqstp->rq_res.page_len) 
+		iov = &rqstp->rq_res.tail[0];
+	else
+		iov = &rqstp->rq_res.head[0];
+	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
+	BUG_ON(iov->iov_len > PAGE_SIZE);
+	return 1;
+}
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
new file mode 100644
index 000000000000..119e4d4495b8
--- /dev/null
+++ b/fs/nfsd/nfscache.c
@@ -0,0 +1,328 @@
+/*
+ * linux/fs/nfsd/nfscache.c
+ *
+ * Request reply cache. This is currently a global cache, but this may
+ * change in the future and be a per-client cache.
+ *
+ * This code is heavily inspired by the 44BSD implementation, although
+ * it does things a bit differently.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+
+/* Size of reply cache. Common values are:
+ * 4.3BSD:	128
+ * 4.4BSD:	256
+ * Solaris2:	1024
+ * DEC Unix:	512-4096
+ */
+#define CACHESIZE		1024
+#define HASHSIZE		64
+#define REQHASH(xid)		((((xid) >> 24) ^ (xid)) & (HASHSIZE-1))
+
+static struct hlist_head *	hash_list;
+static struct list_head 	lru_head;
+static int			cache_disabled = 1;
+
+static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
+
+/* 
+ * locking for the reply cache:
+ * A cache entry is "single use" if c_state == RC_INPROG
+ * Otherwise, it when accessing _prev or _next, the lock must be held.
+ */
+static DEFINE_SPINLOCK(cache_lock);
+
+void
+nfsd_cache_init(void)
+{
+	struct svc_cacherep	*rp;
+	int			i;
+
+	INIT_LIST_HEAD(&lru_head);
+	i = CACHESIZE;
+	while(i) {
+		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+		if (!rp) break;
+		list_add(&rp->c_lru, &lru_head);
+		rp->c_state = RC_UNUSED;
+		rp->c_type = RC_NOCACHE;
+		INIT_HLIST_NODE(&rp->c_hash);
+		i--;
+	}
+
+	if (i)
+		printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
+			CACHESIZE, CACHESIZE-i);
+
+	hash_list = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
+	if (!hash_list) {
+		nfsd_cache_shutdown();
+		printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
+			HASHSIZE * sizeof(struct hlist_head));
+		return;
+	}
+	memset(hash_list, 0, HASHSIZE * sizeof(struct hlist_head));
+
+	cache_disabled = 0;
+}
+
+void
+nfsd_cache_shutdown(void)
+{
+	struct svc_cacherep	*rp;
+
+	while (!list_empty(&lru_head)) {
+		rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
+		if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
+			kfree(rp->c_replvec.iov_base);
+		list_del(&rp->c_lru);
+		kfree(rp);
+	}
+
+	cache_disabled = 1;
+
+	if (hash_list)
+		kfree (hash_list);
+	hash_list = NULL;
+}
+
+/*
+ * Move cache entry to end of LRU list
+ */
+static void
+lru_put_end(struct svc_cacherep *rp)
+{
+	list_del(&rp->c_lru);
+	list_add_tail(&rp->c_lru, &lru_head);
+}
+
+/*
+ * Move a cache entry from one hash list to another
+ */
+static void
+hash_refile(struct svc_cacherep *rp)
+{
+	hlist_del_init(&rp->c_hash);
+	hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid));
+}
+
+/*
+ * Try to find an entry matching the current call in the cache. When none
+ * is found, we grab the oldest unlocked entry off the LRU list.
+ * Note that no operation within the loop may sleep.
+ */
+int
+nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
+{
+	struct hlist_node	*hn;
+	struct hlist_head 	*rh;
+	struct svc_cacherep	*rp;
+	u32			xid = rqstp->rq_xid,
+				proto =  rqstp->rq_prot,
+				vers = rqstp->rq_vers,
+				proc = rqstp->rq_proc;
+	unsigned long		age;
+	int rtn;
+
+	rqstp->rq_cacherep = NULL;
+	if (cache_disabled || type == RC_NOCACHE) {
+		nfsdstats.rcnocache++;
+		return RC_DOIT;
+	}
+
+	spin_lock(&cache_lock);
+	rtn = RC_DOIT;
+
+	rh = &hash_list[REQHASH(xid)];
+	hlist_for_each_entry(rp, hn, rh, c_hash) {
+		if (rp->c_state != RC_UNUSED &&
+		    xid == rp->c_xid && proc == rp->c_proc &&
+		    proto == rp->c_prot && vers == rp->c_vers &&
+		    time_before(jiffies, rp->c_timestamp + 120*HZ) &&
+		    memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
+			nfsdstats.rchits++;
+			goto found_entry;
+		}
+	}
+	nfsdstats.rcmisses++;
+
+	/* This loop shouldn't take more than a few iterations normally */
+	{
+	int	safe = 0;
+	list_for_each_entry(rp, &lru_head, c_lru) {
+		if (rp->c_state != RC_INPROG)
+			break;
+		if (safe++ > CACHESIZE) {
+			printk("nfsd: loop in repcache LRU list\n");
+			cache_disabled = 1;
+			goto out;
+		}
+	}
+	}
+
+	/* This should not happen */
+	if (rp == NULL) {
+		static int	complaints;
+
+		printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
+		if (++complaints > 5) {
+			printk(KERN_WARNING "nfsd: disabling repcache.\n");
+			cache_disabled = 1;
+		}
+		goto out;
+	}
+
+	rqstp->rq_cacherep = rp;
+	rp->c_state = RC_INPROG;
+	rp->c_xid = xid;
+	rp->c_proc = proc;
+	rp->c_addr = rqstp->rq_addr;
+	rp->c_prot = proto;
+	rp->c_vers = vers;
+	rp->c_timestamp = jiffies;
+
+	hash_refile(rp);
+
+	/* release any buffer */
+	if (rp->c_type == RC_REPLBUFF) {
+		kfree(rp->c_replvec.iov_base);
+		rp->c_replvec.iov_base = NULL;
+	}
+	rp->c_type = RC_NOCACHE;
+ out:
+	spin_unlock(&cache_lock);
+	return rtn;
+
+found_entry:
+	/* We found a matching entry which is either in progress or done. */
+	age = jiffies - rp->c_timestamp;
+	rp->c_timestamp = jiffies;
+	lru_put_end(rp);
+
+	rtn = RC_DROPIT;
+	/* Request being processed or excessive rexmits */
+	if (rp->c_state == RC_INPROG || age < RC_DELAY)
+		goto out;
+
+	/* From the hall of fame of impractical attacks:
+	 * Is this a user who tries to snoop on the cache? */
+	rtn = RC_DOIT;
+	if (!rqstp->rq_secure && rp->c_secure)
+		goto out;
+
+	/* Compose RPC reply header */
+	switch (rp->c_type) {
+	case RC_NOCACHE:
+		break;
+	case RC_REPLSTAT:
+		svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
+		rtn = RC_REPLY;
+		break;
+	case RC_REPLBUFF:
+		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
+			goto out;	/* should not happen */
+		rtn = RC_REPLY;
+		break;
+	default:
+		printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
+		rp->c_state = RC_UNUSED;
+	}
+
+	goto out;
+}
+
+/*
+ * Update a cache entry. This is called from nfsd_dispatch when
+ * the procedure has been executed and the complete reply is in
+ * rqstp->rq_res.
+ *
+ * We're copying around data here rather than swapping buffers because
+ * the toplevel loop requires max-sized buffers, which would be a waste
+ * of memory for a cache with a max reply size of 100 bytes (diropokres).
+ *
+ * If we should start to use different types of cache entries tailored
+ * specifically for attrstat and fh's, we may save even more space.
+ *
+ * Also note that a cachetype of RC_NOCACHE can legally be passed when
+ * nfsd failed to encode a reply that otherwise would have been cached.
+ * In this case, nfsd_cache_update is called with statp == NULL.
+ */
+void
+nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
+{
+	struct svc_cacherep *rp;
+	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
+	int		len;
+
+	if (!(rp = rqstp->rq_cacherep) || cache_disabled)
+		return;
+
+	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
+	len >>= 2;
+	
+	/* Don't cache excessive amounts of data and XDR failures */
+	if (!statp || len > (256 >> 2)) {
+		rp->c_state = RC_UNUSED;
+		return;
+	}
+
+	switch (cachetype) {
+	case RC_REPLSTAT:
+		if (len != 1)
+			printk("nfsd: RC_REPLSTAT/reply len %d!\n",len);
+		rp->c_replstat = *statp;
+		break;
+	case RC_REPLBUFF:
+		cachv = &rp->c_replvec;
+		cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
+		if (!cachv->iov_base) {
+			spin_lock(&cache_lock);
+			rp->c_state = RC_UNUSED;
+			spin_unlock(&cache_lock);
+			return;
+		}
+		cachv->iov_len = len << 2;
+		memcpy(cachv->iov_base, statp, len << 2);
+		break;
+	}
+	spin_lock(&cache_lock);
+	lru_put_end(rp);
+	rp->c_secure = rqstp->rq_secure;
+	rp->c_type = cachetype;
+	rp->c_state = RC_DONE;
+	rp->c_timestamp = jiffies;
+	spin_unlock(&cache_lock);
+	return;
+}
+
+/*
+ * Copy cached reply to current reply buffer. Should always fit.
+ * FIXME as reply is in a page, we should just attach the page, and
+ * keep a refcount....
+ */
+static int
+nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
+{
+	struct kvec	*vec = &rqstp->rq_res.head[0];
+
+	if (vec->iov_len + data->iov_len > PAGE_SIZE) {
+		printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
+				data->iov_len);
+		return 0;
+	}
+	memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
+	vec->iov_len += data->iov_len;
+	return 1;
+}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
new file mode 100644
index 000000000000..161afdcb8f7d
--- /dev/null
+++ b/fs/nfsd/nfsctl.c
@@ -0,0 +1,438 @@
+/*
+ * linux/fs/nfsd/nfsctl.c
+ *
+ * Syscall interface to knfsd.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/linkage.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+
+#include <linux/nfs.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/nfsd/interface.h>
+
+#include <asm/uaccess.h>
+
+/*
+ *	We have a single directory with 9 nodes in it.
+ */
+enum {
+	NFSD_Root = 1,
+	NFSD_Svc,
+	NFSD_Add,
+	NFSD_Del,
+	NFSD_Export,
+	NFSD_Unexport,
+	NFSD_Getfd,
+	NFSD_Getfs,
+	NFSD_List,
+	NFSD_Fh,
+	NFSD_Threads,
+	NFSD_Leasetime,
+};
+
+/*
+ * write() for these nodes.
+ */
+static ssize_t write_svc(struct file *file, char *buf, size_t size);
+static ssize_t write_add(struct file *file, char *buf, size_t size);
+static ssize_t write_del(struct file *file, char *buf, size_t size);
+static ssize_t write_export(struct file *file, char *buf, size_t size);
+static ssize_t write_unexport(struct file *file, char *buf, size_t size);
+static ssize_t write_getfd(struct file *file, char *buf, size_t size);
+static ssize_t write_getfs(struct file *file, char *buf, size_t size);
+static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
+static ssize_t write_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+
+static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+	[NFSD_Svc] = write_svc,
+	[NFSD_Add] = write_add,
+	[NFSD_Del] = write_del,
+	[NFSD_Export] = write_export,
+	[NFSD_Unexport] = write_unexport,
+	[NFSD_Getfd] = write_getfd,
+	[NFSD_Getfs] = write_getfs,
+	[NFSD_Fh] = write_filehandle,
+	[NFSD_Threads] = write_threads,
+	[NFSD_Leasetime] = write_leasetime,
+};
+
+static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+{
+	ino_t ino =  file->f_dentry->d_inode->i_ino;
+	char *data;
+	ssize_t rv;
+
+	if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino])
+		return -EINVAL;
+
+	data = simple_transaction_get(file, buf, size);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	rv =  write_op[ino](file, data, size);
+	if (rv>0) {
+		simple_transaction_set(file, rv);
+		rv = size;
+	}
+	return rv;
+}
+
+static struct file_operations transaction_ops = {
+	.write		= nfsctl_transaction_write,
+	.read		= simple_transaction_read,
+	.release	= simple_transaction_release,
+};
+
+extern struct seq_operations nfs_exports_op;
+static int exports_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &nfs_exports_op);
+}
+
+static struct file_operations exports_operations = {
+	.open		= exports_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/*----------------------------------------------------------------------------*/
+/*
+ * payload - write methods
+ * If the method has a response, the response should be put in buf,
+ * and the length returned.  Otherwise return 0 or and -error.
+ */
+
+static ssize_t write_svc(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_svc *data;
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_svc*) buf;
+	return nfsd_svc(data->svc_port, data->svc_nthreads);
+}
+
+static ssize_t write_add(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_client *data;
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_client *)buf;
+	return exp_addclient(data);
+}
+
+static ssize_t write_del(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_client *data;
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_client *)buf;
+	return exp_delclient(data);
+}
+
+static ssize_t write_export(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_export *data;
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_export*)buf;
+	return exp_export(data);
+}
+
+static ssize_t write_unexport(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_export *data;
+
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_export*)buf;
+	return exp_unexport(data);
+}
+
+static ssize_t write_getfs(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_fsparm *data;
+	struct sockaddr_in *sin;
+	struct auth_domain *clp;
+	int err = 0;
+	struct knfsd_fh *res;
+
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_fsparm*)buf;
+	err = -EPROTONOSUPPORT;
+	if (data->gd_addr.sa_family != AF_INET)
+		goto out;
+	sin = (struct sockaddr_in *)&data->gd_addr;
+	if (data->gd_maxlen > NFS3_FHSIZE)
+		data->gd_maxlen = NFS3_FHSIZE;
+
+	res = (struct knfsd_fh*)buf;
+
+	exp_readlock();
+	if (!(clp = auth_unix_lookup(sin->sin_addr)))
+		err = -EPERM;
+	else {
+		err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen);
+		auth_domain_put(clp);
+	}
+	exp_readunlock();
+	if (err == 0)
+		err = res->fh_size + (int)&((struct knfsd_fh*)0)->fh_base;
+ out:
+	return err;
+}
+
+static ssize_t write_getfd(struct file *file, char *buf, size_t size)
+{
+	struct nfsctl_fdparm *data;
+	struct sockaddr_in *sin;
+	struct auth_domain *clp;
+	int err = 0;
+	struct knfsd_fh fh;
+	char *res;
+
+	if (size < sizeof(*data))
+		return -EINVAL;
+	data = (struct nfsctl_fdparm*)buf;
+	err = -EPROTONOSUPPORT;
+	if (data->gd_addr.sa_family != AF_INET)
+		goto out;
+	err = -EINVAL;
+	if (data->gd_version < 2 || data->gd_version > NFSSVC_MAXVERS)
+		goto out;
+
+	res = buf;
+	sin = (struct sockaddr_in *)&data->gd_addr;
+	exp_readlock();
+	if (!(clp = auth_unix_lookup(sin->sin_addr)))
+		err = -EPERM;
+	else {
+		err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE);
+		auth_domain_put(clp);
+	}
+	exp_readunlock();
+
+	if (err == 0) {
+		memset(res,0, NFS_FHSIZE);
+		memcpy(res, &fh.fh_base, fh.fh_size);
+		err = NFS_FHSIZE;
+	}
+ out:
+	return err;
+}
+
+static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
+{
+	/* request is:
+	 *   domain path maxsize
+	 * response is
+	 *   filehandle
+	 *
+	 * qword quoting is used, so filehandle will be \x....
+	 */
+	char *dname, *path;
+	int maxsize;
+	char *mesg = buf;
+	int len;
+	struct auth_domain *dom;
+	struct knfsd_fh fh;
+
+	if (buf[size-1] != '\n')
+		return -EINVAL;
+	buf[size-1] = 0;
+
+	dname = mesg;
+	len = qword_get(&mesg, dname, size);
+	if (len <= 0) return -EINVAL;
+	
+	path = dname+len+1;
+	len = qword_get(&mesg, path, size);
+	if (len <= 0) return -EINVAL;
+
+	len = get_int(&mesg, &maxsize);
+	if (len)
+		return len;
+
+	if (maxsize < NFS_FHSIZE)
+		return -EINVAL;
+	if (maxsize > NFS3_FHSIZE)
+		maxsize = NFS3_FHSIZE;
+
+	if (qword_get(&mesg, mesg, size)>0)
+		return -EINVAL;
+
+	/* we have all the words, they are in buf.. */
+	dom = unix_domain_find(dname);
+	if (!dom)
+		return -ENOMEM;
+
+	len = exp_rootfh(dom, path, &fh,  maxsize);
+	auth_domain_put(dom);
+	if (len)
+		return len;
+	
+	mesg = buf; len = SIMPLE_TRANSACTION_LIMIT;
+	qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size);
+	mesg[-1] = '\n';
+	return mesg - buf;	
+}
+
+extern int nfsd_nrthreads(void);
+
+static ssize_t write_threads(struct file *file, char *buf, size_t size)
+{
+	/* if size > 0, look for a number of threads and call nfsd_svc
+	 * then write out number of threads as reply
+	 */
+	char *mesg = buf;
+	int rv;
+	if (size > 0) {
+		int newthreads;
+		rv = get_int(&mesg, &newthreads);
+		if (rv)
+			return rv;
+		if (newthreads <0)
+			return -EINVAL;
+		rv = nfsd_svc(2049, newthreads);
+		if (rv)
+			return rv;
+	}
+	sprintf(buf, "%d\n", nfsd_nrthreads());
+	return strlen(buf);
+}
+
+extern time_t nfs4_leasetime(void);
+
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+	/* if size > 10 seconds, call
+	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
+	 */
+	char *mesg = buf;
+	int rv;
+
+	if (size > 0) {
+		int lease;
+		rv = get_int(&mesg, &lease);
+		if (rv)
+			return rv;
+		if (lease < 10 || lease > 3600)
+			return -EINVAL;
+		nfs4_reset_lease(lease);
+	}
+	sprintf(buf, "%ld\n", nfs4_lease_time());
+	return strlen(buf);
+}
+
+/*----------------------------------------------------------------------------*/
+/*
+ *	populating the filesystem.
+ */
+
+static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
+{
+	static struct tree_descr nfsd_files[] = {
+		[NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
+		[NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
+		[NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
+		[NFSD_Export] = {".export", &transaction_ops, S_IWUSR},
+		[NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
+		[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+		[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+#ifdef CONFIG_NFSD_V4
+		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
+		/* last one */ {""}
+	};
+	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+}
+
+static struct super_block *nfsd_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_single(fs_type, flags, data, nfsd_fill_super);
+}
+
+static struct file_system_type nfsd_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "nfsd",
+	.get_sb		= nfsd_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static int __init init_nfsd(void)
+{
+	int retval;
+	printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
+
+	nfsd_stat_init();	/* Statistics */
+	nfsd_cache_init();	/* RPC reply cache */
+	nfsd_export_init();	/* Exports table */
+	nfsd_lockd_init();	/* lockd->nfsd callbacks */
+#ifdef CONFIG_NFSD_V4
+	nfsd_idmap_init();      /* Name to ID mapping */
+#endif /* CONFIG_NFSD_V4 */
+	if (proc_mkdir("fs/nfs", NULL)) {
+		struct proc_dir_entry *entry;
+		entry = create_proc_entry("fs/nfs/exports", 0, NULL);
+		if (entry)
+			entry->proc_fops =  &exports_operations;
+	}
+	retval = register_filesystem(&nfsd_fs_type);
+	if (retval) {
+		nfsd_export_shutdown();
+		nfsd_cache_shutdown();
+		remove_proc_entry("fs/nfs/exports", NULL);
+		remove_proc_entry("fs/nfs", NULL);
+		nfsd_stat_shutdown();
+		nfsd_lockd_shutdown();
+	}
+	return retval;
+}
+
+static void __exit exit_nfsd(void)
+{
+	nfsd_export_shutdown();
+	nfsd_cache_shutdown();
+	remove_proc_entry("fs/nfs/exports", NULL);
+	remove_proc_entry("fs/nfs", NULL);
+	nfsd_stat_shutdown();
+	nfsd_lockd_shutdown();
+#ifdef CONFIG_NFSD_V4
+	nfsd_idmap_shutdown();
+#endif /* CONFIG_NFSD_V4 */
+	unregister_filesystem(&nfsd_fs_type);
+}
+
+MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_LICENSE("GPL");
+module_init(init_nfsd)
+module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
new file mode 100644
index 000000000000..7a3e397b4ed3
--- /dev/null
+++ b/fs/nfsd/nfsfh.c
@@ -0,0 +1,532 @@
+/*
+ * linux/fs/nfsd/nfsfh.c
+ *
+ * NFS server file handle treatment.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ * Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org>
+ * Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999
+ * ... and again Southern-Winter 2001 to support export_operations
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <asm/pgtable.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_FH
+#define NFSD_PARANOIA 1
+/* #define NFSD_DEBUG_VERBOSE 1 */
+
+
+static int nfsd_nr_verified;
+static int nfsd_nr_put;
+
+extern struct export_operations export_op_default;
+
+#define	CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun)
+
+/*
+ * our acceptability function.
+ * if NOSUBTREECHECK, accept anything
+ * if not, require that we can walk up to exp->ex_dentry
+ * doing some checks on the 'x' bits
+ */
+static int nfsd_acceptable(void *expv, struct dentry *dentry)
+{
+	struct svc_export *exp = expv;
+	int rv;
+	struct dentry *tdentry;
+	struct dentry *parent;
+
+	if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+		return 1;
+
+	tdentry = dget(dentry);
+	while (tdentry != exp->ex_dentry && ! IS_ROOT(tdentry)) {
+		/* make sure parents give x permission to user */
+		int err;
+		parent = dget_parent(tdentry);
+		err = permission(parent->d_inode, MAY_EXEC, NULL);
+		if (err < 0) {
+			dput(parent);
+			break;
+		}
+		dput(tdentry);
+		tdentry = parent;
+	}
+	if (tdentry != exp->ex_dentry)
+		dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
+	rv = (tdentry == exp->ex_dentry);
+	dput(tdentry);
+	return rv;
+}
+
+/* Type check. The correct error return for type mismatches does not seem to be
+ * generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a
+ * comment in the NFSv3 spec says this is incorrect (implementation notes for
+ * the write call).
+ */
+static inline int
+nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
+{
+	/* Type can be negative when creating hardlinks - not to a dir */
+	if (type > 0 && (mode & S_IFMT) != type) {
+		if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK)
+			return nfserr_symlink;
+		else if (type == S_IFDIR)
+			return nfserr_notdir;
+		else if ((mode & S_IFMT) == S_IFDIR)
+			return nfserr_isdir;
+		else
+			return nfserr_inval;
+	}
+	if (type < 0 && (mode & S_IFMT) == -type) {
+		if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK)
+			return nfserr_symlink;
+		else if (type == -S_IFDIR)
+			return nfserr_isdir;
+		else
+			return nfserr_notdir;
+	}
+	return 0;
+}
+
+/*
+ * Perform sanity checks on the dentry in a client's file handle.
+ *
+ * Note that the file handle dentry may need to be freed even after
+ * an error return.
+ *
+ * This is only called at the start of an nfsproc call, so fhp points to
+ * a svc_fh which is all 0 except for the over-the-wire file handle.
+ */
+u32
+fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+{
+	struct knfsd_fh	*fh = &fhp->fh_handle;
+	struct svc_export *exp = NULL;
+	struct dentry	*dentry;
+	u32		error = 0;
+
+	dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
+
+	/* keep this filehandle for possible reference  when encoding attributes */
+	rqstp->rq_reffh = fh;
+
+	if (!fhp->fh_dentry) {
+		__u32 *datap=NULL;
+		__u32 tfh[3];		/* filehandle fragment for oldstyle filehandles */
+		int fileid_type;
+		int data_left = fh->fh_size/4;
+
+		error = nfserr_stale;
+		if (rqstp->rq_client == NULL)
+			goto out;
+		if (rqstp->rq_vers > 2)
+			error = nfserr_badhandle;
+		if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+			return nfserr_nofilehandle;
+
+		if (fh->fh_version == 1) {
+			int len;
+			datap = fh->fh_auth;
+			if (--data_left<0) goto out;
+			switch (fh->fh_auth_type) {
+			case 0: break;
+			default: goto out;
+			}
+			len = key_len(fh->fh_fsid_type) / 4;
+			if (len == 0) goto out;
+			if  (fh->fh_fsid_type == 2) {
+				/* deprecated, convert to type 3 */
+				len = 3;
+				fh->fh_fsid_type = 3;
+				fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
+				fh->fh_fsid[1] = fh->fh_fsid[2];
+			}
+			if ((data_left -= len)<0) goto out;
+			exp = exp_find(rqstp->rq_client, fh->fh_fsid_type, datap, &rqstp->rq_chandle);
+			datap += len;
+		} else {
+			dev_t xdev;
+			ino_t xino;
+			if (fh->fh_size != NFS_FHSIZE)
+				goto out;
+			/* assume old filehandle format */
+			xdev = old_decode_dev(fh->ofh_xdev);
+			xino = u32_to_ino_t(fh->ofh_xino);
+			mk_fsid_v0(tfh, xdev, xino);
+			exp = exp_find(rqstp->rq_client, 0, tfh, &rqstp->rq_chandle);
+		}
+
+		error = nfserr_dropit;
+		if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
+			goto out;
+
+		error = nfserr_stale; 
+		if (!exp || IS_ERR(exp))
+			goto out;
+
+		/* Check if the request originated from a secure port. */
+		error = nfserr_perm;
+		if (!rqstp->rq_secure && EX_SECURE(exp)) {
+			printk(KERN_WARNING
+			       "nfsd: request from insecure port (%u.%u.%u.%u:%d)!\n",
+			       NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
+			       ntohs(rqstp->rq_addr.sin_port));
+			goto out;
+		}
+
+		/* Set user creds for this exportpoint */
+		error = nfsd_setuser(rqstp, exp);
+		if (error) {
+			error = nfserrno(error);
+			goto out;
+		}
+
+		/*
+		 * Look up the dentry using the NFS file handle.
+		 */
+		error = nfserr_stale;
+		if (rqstp->rq_vers > 2)
+			error = nfserr_badhandle;
+
+		if (fh->fh_version != 1) {
+			tfh[0] = fh->ofh_ino;
+			tfh[1] = fh->ofh_generation;
+			tfh[2] = fh->ofh_dirino;
+			datap = tfh;
+			data_left = 3;
+			if (fh->ofh_dirino == 0)
+				fileid_type = 1;
+			else
+				fileid_type = 2;
+		} else
+			fileid_type = fh->fh_fileid_type;
+		
+		if (fileid_type == 0)
+			dentry = dget(exp->ex_dentry);
+		else {
+			struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op;
+			dentry = CALL(nop,decode_fh)(exp->ex_mnt->mnt_sb,
+						     datap, data_left,
+						     fileid_type,
+						     nfsd_acceptable, exp);
+		}
+		if (dentry == NULL)
+			goto out;
+		if (IS_ERR(dentry)) {
+			if (PTR_ERR(dentry) != -EINVAL)
+				error = nfserrno(PTR_ERR(dentry));
+			goto out;
+		}
+#ifdef NFSD_PARANOIA
+		if (S_ISDIR(dentry->d_inode->i_mode) &&
+		    (dentry->d_flags & DCACHE_DISCONNECTED)) {
+			printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
+			       dentry->d_parent->d_name.name, dentry->d_name.name);
+		}
+#endif
+
+		fhp->fh_dentry = dentry;
+		fhp->fh_export = exp;
+		nfsd_nr_verified++;
+	} else {
+		/* just rechecking permissions
+		 * (e.g. nfsproc_create calls fh_verify, then nfsd_create does as well)
+		 */
+		dprintk("nfsd: fh_verify - just checking\n");
+		dentry = fhp->fh_dentry;
+		exp = fhp->fh_export;
+	}
+	cache_get(&exp->h);
+
+	error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
+	if (error)
+		goto out;
+
+	/* Finally, check access permissions. */
+	error = nfsd_permission(exp, dentry, access);
+
+#ifdef NFSD_PARANOIA_EXTREME
+	if (error) {
+		printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n",
+		       dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24));
+	}
+#endif
+out:
+	if (exp && !IS_ERR(exp))
+		exp_put(exp);
+	if (error == nfserr_stale)
+		nfsdstats.fh_stale++;
+	return error;
+}
+
+
+/*
+ * Compose a file handle for an NFS reply.
+ *
+ * Note that when first composed, the dentry may not yet have
+ * an inode.  In this case a call to fh_update should be made
+ * before the fh goes out on the wire ...
+ */
+static inline int _fh_update(struct dentry *dentry, struct svc_export *exp,
+			     __u32 *datap, int *maxsize)
+{
+	struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op;
+	
+	if (dentry == exp->ex_dentry) {
+		*maxsize = 0;
+		return 0;
+	}
+
+	return CALL(nop,encode_fh)(dentry, datap, maxsize,
+			  !(exp->ex_flags&NFSEXP_NOSUBTREECHECK));
+}
+
+/*
+ * for composing old style file handles
+ */
+static inline void _fh_update_old(struct dentry *dentry,
+				  struct svc_export *exp,
+				  struct knfsd_fh *fh)
+{
+	fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino);
+	fh->ofh_generation = dentry->d_inode->i_generation;
+	if (S_ISDIR(dentry->d_inode->i_mode) ||
+	    (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+		fh->ofh_dirino = 0;
+}
+
+int
+fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh)
+{
+	/* ref_fh is a reference file handle.
+	 * if it is non-null, then we should compose a filehandle which is
+	 * of the same version, where possible.
+	 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
+	 * Then create a 32byte filehandle using nfs_fhbase_old
+	 *
+	 */
+
+	u8 ref_fh_version = 0;
+	u8 ref_fh_fsid_type = 0;
+	struct inode * inode = dentry->d_inode;
+	struct dentry *parent = dentry->d_parent;
+	__u32 *datap;
+	dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev;
+
+	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
+		MAJOR(ex_dev), MINOR(ex_dev),
+		(long) exp->ex_dentry->d_inode->i_ino,
+		parent->d_name.name, dentry->d_name.name,
+		(inode ? inode->i_ino : 0));
+
+	if (ref_fh) {
+		ref_fh_version = ref_fh->fh_handle.fh_version;
+		if (ref_fh_version == 0xca)
+			ref_fh_fsid_type = 0;
+		else
+			ref_fh_fsid_type = ref_fh->fh_handle.fh_fsid_type;
+		if (ref_fh_fsid_type > 3)
+			ref_fh_fsid_type = 0;
+
+		/* make sure ref_fh type works for given export */
+		if (ref_fh_fsid_type == 1 &&
+		    !(exp->ex_flags & NFSEXP_FSID)) {
+			/* if we don't have an fsid, we cannot provide one... */
+			ref_fh_fsid_type = 0;
+		}
+	} else if (exp->ex_flags & NFSEXP_FSID)
+		ref_fh_fsid_type = 1;
+
+	if (!old_valid_dev(ex_dev) && ref_fh_fsid_type == 0) {
+		/* for newer device numbers, we must use a newer fsid format */
+		ref_fh_version = 1;
+		ref_fh_fsid_type = 3;
+	}
+	if (old_valid_dev(ex_dev) &&
+	    (ref_fh_fsid_type == 2 || ref_fh_fsid_type == 3))
+		/* must use type1 for smaller device numbers */
+		ref_fh_fsid_type = 0;
+
+	if (ref_fh == fhp)
+		fh_put(ref_fh);
+
+	if (fhp->fh_locked || fhp->fh_dentry) {
+		printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
+			parent->d_name.name, dentry->d_name.name);
+	}
+	if (fhp->fh_maxsize < NFS_FHSIZE)
+		printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
+		       fhp->fh_maxsize, parent->d_name.name, dentry->d_name.name);
+
+	fhp->fh_dentry = dget(dentry); /* our internal copy */
+	fhp->fh_export = exp;
+	cache_get(&exp->h);
+
+	if (ref_fh_version == 0xca) {
+		/* old style filehandle please */
+		memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
+		fhp->fh_handle.fh_size = NFS_FHSIZE;
+		fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
+		fhp->fh_handle.ofh_dev =  old_encode_dev(ex_dev);
+		fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
+		fhp->fh_handle.ofh_xino = ino_t_to_u32(exp->ex_dentry->d_inode->i_ino);
+		fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
+		if (inode)
+			_fh_update_old(dentry, exp, &fhp->fh_handle);
+	} else {
+		int len;
+		fhp->fh_handle.fh_version = 1;
+		fhp->fh_handle.fh_auth_type = 0;
+		datap = fhp->fh_handle.fh_auth+0;
+		fhp->fh_handle.fh_fsid_type = ref_fh_fsid_type;
+		switch (ref_fh_fsid_type) {
+			case 0:
+				/*
+				 * fsid_type 0:
+				 * 2byte major, 2byte minor, 4byte inode
+				 */
+				mk_fsid_v0(datap, ex_dev,
+					exp->ex_dentry->d_inode->i_ino);
+				break;
+			case 1:
+				/* fsid_type 1 == 4 bytes filesystem id */
+				mk_fsid_v1(datap, exp->ex_fsid);
+				break;
+			case 2:
+				/*
+				 * fsid_type 2:
+				 * 4byte major, 4byte minor, 4byte inode
+				 */
+				mk_fsid_v2(datap, ex_dev,
+					exp->ex_dentry->d_inode->i_ino);
+				break;
+			case 3:
+				/*
+				 * fsid_type 3:
+				 * 4byte devicenumber, 4byte inode
+				 */
+				mk_fsid_v3(datap, ex_dev,
+					exp->ex_dentry->d_inode->i_ino);
+				break;
+		}
+		len = key_len(ref_fh_fsid_type);
+		datap += len/4;
+		fhp->fh_handle.fh_size = 4 + len;
+
+		if (inode) {
+			int size = (fhp->fh_maxsize-len-4)/4;
+			fhp->fh_handle.fh_fileid_type =
+				_fh_update(dentry, exp, datap, &size);
+			fhp->fh_handle.fh_size += size*4;
+		}
+		if (fhp->fh_handle.fh_fileid_type == 255)
+			return nfserr_opnotsupp;
+	}
+
+	nfsd_nr_verified++;
+	return 0;
+}
+
+/*
+ * Update file handle information after changing a dentry.
+ * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
+ */
+int
+fh_update(struct svc_fh *fhp)
+{
+	struct dentry *dentry;
+	__u32 *datap;
+	
+	if (!fhp->fh_dentry)
+		goto out_bad;
+
+	dentry = fhp->fh_dentry;
+	if (!dentry->d_inode)
+		goto out_negative;
+	if (fhp->fh_handle.fh_version != 1) {
+		_fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
+	} else {
+		int size;
+		if (fhp->fh_handle.fh_fileid_type != 0)
+			goto out_uptodate;
+		datap = fhp->fh_handle.fh_auth+
+			fhp->fh_handle.fh_size/4 -1;
+		size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
+		fhp->fh_handle.fh_fileid_type =
+			_fh_update(dentry, fhp->fh_export, datap, &size);
+		fhp->fh_handle.fh_size += size*4;
+		if (fhp->fh_handle.fh_fileid_type == 255)
+			return nfserr_opnotsupp;
+	}
+out:
+	return 0;
+
+out_bad:
+	printk(KERN_ERR "fh_update: fh not verified!\n");
+	goto out;
+out_negative:
+	printk(KERN_ERR "fh_update: %s/%s still negative!\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+	goto out;
+out_uptodate:
+	printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
+		dentry->d_parent->d_name.name, dentry->d_name.name);
+	goto out;
+}
+
+/*
+ * Release a file handle.
+ */
+void
+fh_put(struct svc_fh *fhp)
+{
+	struct dentry * dentry = fhp->fh_dentry;
+	struct svc_export * exp = fhp->fh_export;
+	if (dentry) {
+		fh_unlock(fhp);
+		fhp->fh_dentry = NULL;
+		dput(dentry);
+#ifdef CONFIG_NFSD_V3
+		fhp->fh_pre_saved = 0;
+		fhp->fh_post_saved = 0;
+#endif
+		nfsd_nr_put++;
+	}
+	if (exp) {
+		svc_export_put(&exp->h, &svc_export_cache);
+		fhp->fh_export = NULL;
+	}
+	return;
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+char * SVCFH_fmt(struct svc_fh *fhp)
+{
+	struct knfsd_fh *fh = &fhp->fh_handle;
+
+	static char buf[80];
+	sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
+		fh->fh_size,
+		fh->fh_base.fh_pad[0],
+		fh->fh_base.fh_pad[1],
+		fh->fh_base.fh_pad[2],
+		fh->fh_base.fh_pad[3],
+		fh->fh_base.fh_pad[4],
+		fh->fh_base.fh_pad[5]);
+	return buf;
+}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
new file mode 100644
index 000000000000..757f9d208034
--- /dev/null
+++ b/fs/nfsd/nfsproc.c
@@ -0,0 +1,605 @@
+/*
+ * nfsproc2.c	Process version 2 NFS requests.
+ * linux/fs/nfsd/nfs2proc.c
+ * 
+ * Process version 2 NFS requests.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/linkage.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/namei.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+
+typedef struct svc_rqst	svc_rqst;
+typedef struct svc_buf	svc_buf;
+
+#define NFSDDBG_FACILITY		NFSDDBG_PROC
+
+
+static int
+nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get a file's attributes
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
+					  struct nfsd_attrstat *resp)
+{
+	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
+
+	fh_copy(&resp->fh, &argp->fh);
+	return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+}
+
+/*
+ * Set a file's attributes
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
+					  struct nfsd_attrstat  *resp)
+{
+	dprintk("nfsd: SETATTR  %s, valid=%x, size=%ld\n",
+		SVCFH_fmt(&argp->fh),
+		argp->attrs.ia_valid, (long) argp->attrs.ia_size);
+
+	fh_copy(&resp->fh, &argp->fh);
+	return nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+}
+
+/*
+ * Look up a path name component
+ * Note: the dentry in the resp->fh may be negative if the file
+ * doesn't exist yet.
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
+					 struct nfsd_diropres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: LOOKUP   %s %.*s\n",
+		SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+	fh_init(&resp->fh, NFS_FHSIZE);
+	nfserr = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len,
+				 &resp->fh);
+
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+/*
+ * Read a symlink.
+ */
+static int
+nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
+					   struct nfsd_readlinkres *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
+
+	/* Read the symlink. */
+	resp->len = NFS_MAXPATHLEN;
+	nfserr = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len);
+
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+/*
+ * Read a portion of a file.
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
+				       struct nfsd_readres  *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: READ    %s %d bytes at %d\n",
+		SVCFH_fmt(&argp->fh),
+		argp->count, argp->offset);
+
+	/* Obtain buffer pointer for payload. 19 is 1 word for
+	 * status, 17 words for fattr, and 1 word for the byte count.
+	 */
+
+	if (NFSSVC_MAXBLKSIZE < argp->count) {
+		printk(KERN_NOTICE
+			"oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
+				NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
+				ntohs(rqstp->rq_addr.sin_port),
+				argp->count);
+		argp->count = NFSSVC_MAXBLKSIZE;
+	}
+	svc_reserve(rqstp, (19<<2) + argp->count + 4);
+
+	resp->count = argp->count;
+	nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
+				  argp->offset,
+			   	  argp->vec, argp->vlen,
+				  &resp->count);
+
+	return nfserr;
+}
+
+/*
+ * Write data to a file
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
+					struct nfsd_attrstat  *resp)
+{
+	int	nfserr;
+	int	stable = 1;
+
+	dprintk("nfsd: WRITE    %s %d bytes at %d\n",
+		SVCFH_fmt(&argp->fh),
+		argp->len, argp->offset);
+
+	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
+				   argp->offset,
+				   argp->vec, argp->vlen,
+				   argp->len,
+				   &stable);
+	return nfserr;
+}
+
+/*
+ * CREATE processing is complicated. The keyword here is `overloaded.'
+ * The parent directory is kept locked between the check for existence
+ * and the actual create() call in compliance with VFS protocols.
+ * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
+ */
+static int
+nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
+					 struct nfsd_diropres   *resp)
+{
+	svc_fh		*dirfhp = &argp->fh;
+	svc_fh		*newfhp = &resp->fh;
+	struct iattr	*attr = &argp->attrs;
+	struct inode	*inode;
+	struct dentry	*dchild;
+	int		nfserr, type, mode;
+	dev_t		rdev = 0, wanted = new_decode_dev(attr->ia_size);
+
+	dprintk("nfsd: CREATE   %s %.*s\n",
+		SVCFH_fmt(dirfhp), argp->len, argp->name);
+
+	/* First verify the parent file handle */
+	nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC);
+	if (nfserr)
+		goto done; /* must fh_put dirfhp even on error */
+
+	/* Check for MAY_WRITE in nfsd_create if necessary */
+
+	nfserr = nfserr_acces;
+	if (!argp->len)
+		goto done;
+	nfserr = nfserr_exist;
+	if (isdotent(argp->name, argp->len))
+		goto done;
+	fh_lock(dirfhp);
+	dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+	if (IS_ERR(dchild)) {
+		nfserr = nfserrno(PTR_ERR(dchild));
+		goto out_unlock;
+	}
+	fh_init(newfhp, NFS_FHSIZE);
+	nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
+	if (!nfserr && !dchild->d_inode)
+		nfserr = nfserr_noent;
+	dput(dchild);
+	if (nfserr) {
+		if (nfserr != nfserr_noent)
+			goto out_unlock;
+		/*
+		 * If the new file handle wasn't verified, we can't tell
+		 * whether the file exists or not. Time to bail ...
+		 */
+		nfserr = nfserr_acces;
+		if (!newfhp->fh_dentry) {
+			printk(KERN_WARNING 
+				"nfsd_proc_create: file handle not verified\n");
+			goto out_unlock;
+		}
+	}
+
+	inode = newfhp->fh_dentry->d_inode;
+
+	/* Unfudge the mode bits */
+	if (attr->ia_valid & ATTR_MODE) {
+		type = attr->ia_mode & S_IFMT;
+		mode = attr->ia_mode & ~S_IFMT;
+		if (!type) {
+			/* no type, so if target exists, assume same as that,
+			 * else assume a file */
+			if (inode) {
+				type = inode->i_mode & S_IFMT;
+				switch(type) {
+				case S_IFCHR:
+				case S_IFBLK:
+					/* reserve rdev for later checking */
+					rdev = inode->i_rdev;
+					attr->ia_valid |= ATTR_SIZE;
+
+					/* FALLTHROUGH */
+				case S_IFIFO:
+					/* this is probably a permission check..
+					 * at least IRIX implements perm checking on
+					 *   echo thing > device-special-file-or-pipe
+					 * by doing a CREATE with type==0
+					 */
+					nfserr = nfsd_permission(newfhp->fh_export,
+								 newfhp->fh_dentry,
+								 MAY_WRITE|MAY_LOCAL_ACCESS);
+					if (nfserr && nfserr != nfserr_rofs)
+						goto out_unlock;
+				}
+			} else
+				type = S_IFREG;
+		}
+	} else if (inode) {
+		type = inode->i_mode & S_IFMT;
+		mode = inode->i_mode & ~S_IFMT;
+	} else {
+		type = S_IFREG;
+		mode = 0;	/* ??? */
+	}
+
+	attr->ia_valid |= ATTR_MODE;
+	attr->ia_mode = mode;
+
+	/* Special treatment for non-regular files according to the
+	 * gospel of sun micro
+	 */
+	if (type != S_IFREG) {
+		int	is_borc = 0;
+		if (type != S_IFBLK && type != S_IFCHR) {
+			rdev = 0;
+		} else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) {
+			/* If you think you've seen the worst, grok this. */
+			type = S_IFIFO;
+		} else {
+			/* Okay, char or block special */
+			is_borc = 1;
+			if (!rdev)
+				rdev = wanted;
+		}
+
+		/* we've used the SIZE information, so discard it */
+		attr->ia_valid &= ~ATTR_SIZE;
+
+		/* Make sure the type and device matches */
+		nfserr = nfserr_exist;
+		if (inode && type != (inode->i_mode & S_IFMT))
+			goto out_unlock;
+	}
+
+	nfserr = 0;
+	if (!inode) {
+		/* File doesn't exist. Create it and set attrs */
+		nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len,
+					attr, type, rdev, newfhp);
+	} else if (type == S_IFREG) {
+		dprintk("nfsd:   existing %s, valid=%x, size=%ld\n",
+			argp->name, attr->ia_valid, (long) attr->ia_size);
+		/* File already exists. We ignore all attributes except
+		 * size, so that creat() behaves exactly like
+		 * open(..., O_CREAT|O_TRUNC|O_WRONLY).
+		 */
+		attr->ia_valid &= ATTR_SIZE;
+		if (attr->ia_valid)
+			nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+	}
+
+out_unlock:
+	/* We don't really need to unlock, as fh_put does it. */
+	fh_unlock(dirfhp);
+
+done:
+	fh_put(dirfhp);
+	return nfserr;
+}
+
+static int
+nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
+					 void		       *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
+		argp->len, argp->name);
+
+	/* Unlink. -SIFDIR means file must not be a directory */
+	nfserr = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len);
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+static int
+nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
+				  	 void		        *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: RENAME   %s %.*s -> \n",
+		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname);
+	dprintk("nfsd:        ->  %s %.*s\n",
+		SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname);
+
+	nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen,
+				    &argp->tfh, argp->tname, argp->tlen);
+	fh_put(&argp->ffh);
+	fh_put(&argp->tfh);
+	return nfserr;
+}
+
+static int
+nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
+				void			    *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: LINK     %s ->\n",
+		SVCFH_fmt(&argp->ffh));
+	dprintk("nfsd:    %s %.*s\n",
+		SVCFH_fmt(&argp->tfh),
+		argp->tlen,
+		argp->tname);
+
+	nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen,
+				  &argp->ffh);
+	fh_put(&argp->ffh);
+	fh_put(&argp->tfh);
+	return nfserr;
+}
+
+static int
+nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
+				          void			  *resp)
+{
+	struct svc_fh	newfh;
+	int		nfserr;
+
+	dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
+		SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
+		argp->tlen, argp->tname);
+
+	fh_init(&newfh, NFS_FHSIZE);
+	/*
+	 * Create the link, look up new file and set attrs.
+	 */
+	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
+						 argp->tname, argp->tlen,
+				 		 &newfh, &argp->attrs);
+
+
+	fh_put(&argp->ffh);
+	fh_put(&newfh);
+	return nfserr;
+}
+
+/*
+ * Make directory. This operation is not idempotent.
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
+					struct nfsd_diropres   *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+	if (resp->fh.fh_dentry) {
+		printk(KERN_WARNING
+			"nfsd_proc_mkdir: response already verified??\n");
+	}
+
+	argp->attrs.ia_valid &= ~ATTR_SIZE;
+	fh_init(&resp->fh, NFS_FHSIZE);
+	nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
+				    &argp->attrs, S_IFDIR, 0, &resp->fh);
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+/*
+ * Remove a directory
+ */
+static int
+nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
+				 	void		      *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+	nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len);
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+/*
+ * Read a portion of a directory.
+ */
+static int
+nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
+					  struct nfsd_readdirres  *resp)
+{
+	int		nfserr, count;
+	loff_t		offset;
+
+	dprintk("nfsd: READDIR  %s %d bytes at %d\n",
+		SVCFH_fmt(&argp->fh),		
+		argp->count, argp->cookie);
+
+	/* Shrink to the client read size */
+	count = (argp->count >> 2) - 2;
+
+	/* Make sure we've room for the NULL ptr & eof flag */
+	count -= 2;
+	if (count < 0)
+		count = 0;
+
+	resp->buffer = argp->buffer;
+	resp->offset = NULL;
+	resp->buflen = count;
+	resp->common.err = nfs_ok;
+	/* Read directory and encode entries on the fly */
+	offset = argp->cookie;
+	nfserr = nfsd_readdir(rqstp, &argp->fh, &offset, 
+			      &resp->common, nfssvc_encode_entry);
+
+	resp->count = resp->buffer - argp->buffer;
+	if (resp->offset)
+		*resp->offset = htonl(offset);
+
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+/*
+ * Get file system info
+ */
+static int
+nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
+					  struct nfsd_statfsres *resp)
+{
+	int	nfserr;
+
+	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
+
+	nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats);
+	fh_put(&argp->fh);
+	return nfserr;
+}
+
+/*
+ * NFSv2 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+#define nfsd_proc_none		NULL
+#define nfssvc_release_none	NULL
+struct nfsd_void { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsd_proc_##name,		\
+   (kxdrproc_t) nfssvc_decode_##argt,		\
+   (kxdrproc_t) nfssvc_encode_##rest,		\
+   (kxdrproc_t) nfssvc_release_##relt,		\
+   sizeof(struct nfsd_##argt),			\
+   sizeof(struct nfsd_##rest),			\
+   0,						\
+   cache,					\
+   respsize,				       	\
+ }
+
+#define ST 1		/* status */
+#define FH 8		/* filehandle */
+#define	AT 18		/* attributes */
+
+static struct svc_procedure		nfsd_procedures2[18] = {
+  PROC(null,	 void,		void,		none,		RC_NOCACHE, ST),
+  PROC(getattr,	 fhandle,	attrstat,	fhandle,	RC_NOCACHE, ST+AT),
+  PROC(setattr,  sattrargs,	attrstat,	fhandle,	RC_REPLBUFF, ST+AT),
+  PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
+  PROC(lookup,	 diropargs,	diropres,	fhandle,	RC_NOCACHE, ST+FH+AT),
+  PROC(readlink, readlinkargs,	readlinkres,	none,		RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
+  PROC(read,	 readargs,	readres,	fhandle,	RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE),
+  PROC(none,	 void,		void,		none,		RC_NOCACHE, ST),
+  PROC(write,	 writeargs,	attrstat,	fhandle,	RC_REPLBUFF, ST+AT),
+  PROC(create,	 createargs,	diropres,	fhandle,	RC_REPLBUFF, ST+FH+AT),
+  PROC(remove,	 diropargs,	void,		none,		RC_REPLSTAT, ST),
+  PROC(rename,	 renameargs,	void,		none,		RC_REPLSTAT, ST),
+  PROC(link,	 linkargs,	void,		none,		RC_REPLSTAT, ST),
+  PROC(symlink,	 symlinkargs,	void,		none,		RC_REPLSTAT, ST),
+  PROC(mkdir,	 createargs,	diropres,	fhandle,	RC_REPLBUFF, ST+FH+AT),
+  PROC(rmdir,	 diropargs,	void,		none,		RC_REPLSTAT, ST),
+  PROC(readdir,	 readdirargs,	readdirres,	none,		RC_NOCACHE, 0),
+  PROC(statfs,	 fhandle,	statfsres,	none,		RC_NOCACHE, ST+5),
+};
+
+
+struct svc_version	nfsd_version2 = {
+		.vs_vers	= 2,
+		.vs_nproc	= 18,
+		.vs_proc	= nfsd_procedures2,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
+};
+
+/*
+ * Map errnos to NFS errnos.
+ */
+int
+nfserrno (int errno)
+{
+	static struct {
+		int	nfserr;
+		int	syserr;
+	} nfs_errtbl[] = {
+		{ nfs_ok, 0 },
+		{ nfserr_perm, -EPERM },
+		{ nfserr_noent, -ENOENT },
+		{ nfserr_io, -EIO },
+		{ nfserr_nxio, -ENXIO },
+		{ nfserr_acces, -EACCES },
+		{ nfserr_exist, -EEXIST },
+		{ nfserr_xdev, -EXDEV },
+		{ nfserr_mlink, -EMLINK },
+		{ nfserr_nodev, -ENODEV },
+		{ nfserr_notdir, -ENOTDIR },
+		{ nfserr_isdir, -EISDIR },
+		{ nfserr_inval, -EINVAL },
+		{ nfserr_fbig, -EFBIG },
+		{ nfserr_nospc, -ENOSPC },
+		{ nfserr_rofs, -EROFS },
+		{ nfserr_mlink, -EMLINK },
+		{ nfserr_nametoolong, -ENAMETOOLONG },
+		{ nfserr_notempty, -ENOTEMPTY },
+#ifdef EDQUOT
+		{ nfserr_dquot, -EDQUOT },
+#endif
+		{ nfserr_stale, -ESTALE },
+		{ nfserr_jukebox, -ETIMEDOUT },
+		{ nfserr_dropit, -EAGAIN },
+		{ nfserr_dropit, -ENOMEM },
+		{ nfserr_badname, -ESRCH },
+		{ nfserr_io, -ETXTBSY },
+		{ -1, -EIO }
+	};
+	int	i;
+
+	for (i = 0; nfs_errtbl[i].nfserr != -1; i++) {
+		if (nfs_errtbl[i].syserr == errno)
+			return nfs_errtbl[i].nfserr;
+	}
+	printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno);
+	return nfserr_io;
+}
+
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
new file mode 100644
index 000000000000..39551657e656
--- /dev/null
+++ b/fs/nfsd/nfssvc.c
@@ -0,0 +1,385 @@
+/*
+ * linux/fs/nfsd/nfssvc.c
+ *
+ * Central processing for nfsd.
+ *
+ * Authors:	Olaf Kirch (okir@monad.swb.de)
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/nfs.h>
+#include <linux/in.h>
+#include <linux/uio.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/fs_struct.h>
+
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/stats.h>
+#include <linux/nfsd/cache.h>
+#include <linux/lockd/bind.h>
+
+#define NFSDDBG_FACILITY	NFSDDBG_SVC
+
+/* these signals will be delivered to an nfsd thread 
+ * when handling a request
+ */
+#define ALLOWED_SIGS	(sigmask(SIGKILL))
+/* these signals will be delivered to an nfsd thread
+ * when not handling a request. i.e. when waiting
+ */
+#define SHUTDOWN_SIGS	(sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
+/* if the last thread dies with SIGHUP, then the exports table is
+ * left unchanged ( like 2.4-{0-9} ).  Any other signal will clear
+ * the exports table (like 2.2).
+ */
+#define	SIG_NOCLEAN	SIGHUP
+
+extern struct svc_program	nfsd_program;
+static void			nfsd(struct svc_rqst *rqstp);
+struct timeval			nfssvc_boot;
+static struct svc_serv 		*nfsd_serv;
+static atomic_t			nfsd_busy;
+static unsigned long		nfsd_last_call;
+static DEFINE_SPINLOCK(nfsd_call_lock);
+
+struct nfsd_list {
+	struct list_head 	list;
+	struct task_struct	*task;
+};
+static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
+
+/*
+ * Maximum number of nfsd processes
+ */
+#define	NFSD_MAXSERVS		8192
+
+int nfsd_nrthreads(void)
+{
+	if (nfsd_serv == NULL)
+		return 0;
+	else
+		return nfsd_serv->sv_nrthreads;
+}
+
+int
+nfsd_svc(unsigned short port, int nrservs)
+{
+	int	error;
+	int	none_left;	
+	struct list_head *victim;
+	
+	lock_kernel();
+	dprintk("nfsd: creating service\n");
+	error = -EINVAL;
+	if (nrservs <= 0)
+		nrservs = 0;
+	if (nrservs > NFSD_MAXSERVS)
+		nrservs = NFSD_MAXSERVS;
+	
+	/* Readahead param cache - will no-op if it already exists */
+	error =	nfsd_racache_init(2*nrservs);
+	if (error<0)
+		goto out;
+	error = nfs4_state_init();
+	if (error<0)
+		goto out;
+	if (!nfsd_serv) {
+		atomic_set(&nfsd_busy, 0);
+		error = -ENOMEM;
+		nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
+		if (nfsd_serv == NULL)
+			goto out;
+		error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+		if (error < 0)
+			goto failure;
+
+#ifdef CONFIG_NFSD_TCP
+		error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+		if (error < 0)
+			goto failure;
+#endif
+		do_gettimeofday(&nfssvc_boot);		/* record boot time */
+	} else
+		nfsd_serv->sv_nrthreads++;
+	nrservs -= (nfsd_serv->sv_nrthreads-1);
+	while (nrservs > 0) {
+		nrservs--;
+		__module_get(THIS_MODULE);
+		error = svc_create_thread(nfsd, nfsd_serv);
+		if (error < 0) {
+			module_put(THIS_MODULE);
+			break;
+		}
+	}
+	victim = nfsd_list.next;
+	while (nrservs < 0 && victim != &nfsd_list) {
+		struct nfsd_list *nl =
+			list_entry(victim,struct nfsd_list, list);
+		victim = victim->next;
+		send_sig(SIG_NOCLEAN, nl->task, 1);
+		nrservs++;
+	}
+ failure:
+	none_left = (nfsd_serv->sv_nrthreads == 1);
+	svc_destroy(nfsd_serv);		/* Release server */
+	if (none_left) {
+		nfsd_serv = NULL;
+		nfsd_racache_shutdown();
+		nfs4_state_shutdown();
+	}
+ out:
+	unlock_kernel();
+	return error;
+}
+
+static inline void
+update_thread_usage(int busy_threads)
+{
+	unsigned long prev_call;
+	unsigned long diff;
+	int decile;
+
+	spin_lock(&nfsd_call_lock);
+	prev_call = nfsd_last_call;
+	nfsd_last_call = jiffies;
+	decile = busy_threads*10/nfsdstats.th_cnt;
+	if (decile>0 && decile <= 10) {
+		diff = nfsd_last_call - prev_call;
+		if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
+			nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
+		if (decile == 10)
+			nfsdstats.th_fullcnt++;
+	}
+	spin_unlock(&nfsd_call_lock);
+}
+
+/*
+ * This is the NFS server kernel thread
+ */
+static void
+nfsd(struct svc_rqst *rqstp)
+{
+	struct svc_serv	*serv = rqstp->rq_server;
+	struct fs_struct *fsp;
+	int		err;
+	struct nfsd_list me;
+	sigset_t shutdown_mask, allowed_mask;
+
+	/* Lock module and set up kernel thread */
+	lock_kernel();
+	daemonize("nfsd");
+
+	/* After daemonize() this kernel thread shares current->fs
+	 * with the init process. We need to create files with a
+	 * umask of 0 instead of init's umask. */
+	fsp = copy_fs_struct(current->fs);
+	if (!fsp) {
+		printk("Unable to start nfsd thread: out of memory\n");
+		goto out;
+	}
+	exit_fs(current);
+	current->fs = fsp;
+	current->fs->umask = 0;
+
+	siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
+	siginitsetinv(&allowed_mask, ALLOWED_SIGS);
+
+	nfsdstats.th_cnt++;
+
+	lockd_up();				/* start lockd */
+
+	me.task = current;
+	list_add(&me.list, &nfsd_list);
+
+	unlock_kernel();
+
+	/*
+	 * We want less throttling in balance_dirty_pages() so that nfs to
+	 * localhost doesn't cause nfsd to lock up due to all the client's
+	 * dirty pages.
+	 */
+	current->flags |= PF_LESS_THROTTLE;
+
+	/*
+	 * The main request loop
+	 */
+	for (;;) {
+		/* Block all but the shutdown signals */
+		sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
+
+		/*
+		 * Find a socket with data available and call its
+		 * recvfrom routine.
+		 */
+		while ((err = svc_recv(serv, rqstp,
+				       60*60*HZ)) == -EAGAIN)
+			;
+		if (err < 0)
+			break;
+		update_thread_usage(atomic_read(&nfsd_busy));
+		atomic_inc(&nfsd_busy);
+
+		/* Lock the export hash tables for reading. */
+		exp_readlock();
+
+		/* Process request with signals blocked.  */
+		sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
+
+		svc_process(serv, rqstp);
+
+		/* Unlock export hash tables */
+		exp_readunlock();
+		update_thread_usage(atomic_read(&nfsd_busy));
+		atomic_dec(&nfsd_busy);
+	}
+
+	if (err != -EINTR) {
+		printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
+	} else {
+		unsigned int	signo;
+
+		for (signo = 1; signo <= _NSIG; signo++)
+			if (sigismember(&current->pending.signal, signo) &&
+			    !sigismember(&current->blocked, signo))
+				break;
+		err = signo;
+	}
+
+	lock_kernel();
+
+	/* Release lockd */
+	lockd_down();
+
+	/* Check if this is last thread */
+	if (serv->sv_nrthreads==1) {
+		
+		printk(KERN_WARNING "nfsd: last server has exited\n");
+		if (err != SIG_NOCLEAN) {
+			printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+			nfsd_export_flush();
+		}
+		nfsd_serv = NULL;
+	        nfsd_racache_shutdown();	/* release read-ahead cache */
+		nfs4_state_shutdown();
+	}
+	list_del(&me.list);
+	nfsdstats.th_cnt --;
+
+out:
+	/* Release the thread */
+	svc_exit_thread(rqstp);
+
+	/* Release module */
+	module_put_and_exit(0);
+}
+
+int
+nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
+{
+	struct svc_procedure	*proc;
+	kxdrproc_t		xdr;
+	u32			nfserr;
+	u32			*nfserrp;
+
+	dprintk("nfsd_dispatch: vers %d proc %d\n",
+				rqstp->rq_vers, rqstp->rq_proc);
+	proc = rqstp->rq_procinfo;
+
+	/* Check whether we have this call in the cache. */
+	switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) {
+	case RC_INTR:
+	case RC_DROPIT:
+		return 0;
+	case RC_REPLY:
+		return 1;
+	case RC_DOIT:;
+		/* do it */
+	}
+
+	/* Decode arguments */
+	xdr = proc->pc_decode;
+	if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
+			rqstp->rq_argp)) {
+		dprintk("nfsd: failed to decode arguments!\n");
+		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+		*statp = rpc_garbage_args;
+		return 1;
+	}
+
+	/* need to grab the location to store the status, as
+	 * nfsv4 does some encoding while processing 
+	 */
+	nfserrp = rqstp->rq_res.head[0].iov_base
+		+ rqstp->rq_res.head[0].iov_len;
+	rqstp->rq_res.head[0].iov_len += sizeof(u32);
+
+	/* Now call the procedure handler, and encode NFS status. */
+	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+	if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2)
+		nfserr = nfserr_dropit;
+	if (nfserr == nfserr_dropit) {
+		dprintk("nfsd: Dropping request due to malloc failure!\n");
+		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+		return 0;
+	}
+
+	if (rqstp->rq_proc != 0)
+		*nfserrp++ = nfserr;
+
+	/* Encode result.
+	 * For NFSv2, additional info is never returned in case of an error.
+	 */
+	if (!(nfserr && rqstp->rq_vers == 2)) {
+		xdr = proc->pc_encode;
+		if (xdr && !xdr(rqstp, nfserrp,
+				rqstp->rq_resp)) {
+			/* Failed to encode result. Release cache entry */
+			dprintk("nfsd: failed to encode result!\n");
+			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+			*statp = rpc_system_err;
+			return 1;
+		}
+	}
+
+	/* Store reply in cache. */
+	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+	return 1;
+}
+
+extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
+
+static struct svc_version *	nfsd_version[] = {
+	[2] = &nfsd_version2,
+#if defined(CONFIG_NFSD_V3)
+	[3] = &nfsd_version3,
+#endif
+#if defined(CONFIG_NFSD_V4)
+	[4] = &nfsd_version4,
+#endif
+};
+
+#define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
+struct svc_program		nfsd_program = {
+	.pg_prog		= NFS_PROGRAM,		/* program number */
+	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
+	.pg_vers		= nfsd_version,		/* version table */
+	.pg_name		= "nfsd",		/* program name */
+	.pg_class		= "nfsd",		/* authentication class */
+	.pg_stats		= &nfsd_svcstats,	/* version table */
+	.pg_authenticate	= &svc_set_client,	/* export authentication */
+
+};
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
new file mode 100644
index 000000000000..948b08287c99
--- /dev/null
+++ b/fs/nfsd/nfsxdr.c
@@ -0,0 +1,511 @@
+/*
+ * linux/fs/nfsd/xdr.c
+ *
+ * XDR support for nfsd
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/nfs.h>
+#include <linux/vfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/mm.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_XDR
+
+
+#ifdef NFSD_OPTIMIZE_SPACE
+# define inline
+#endif
+
+/*
+ * Mapping of S_IF* types to NFS file types
+ */
+static u32	nfs_ftypes[] = {
+	NFNON,  NFCHR,  NFCHR, NFBAD,
+	NFDIR,  NFBAD,  NFBLK, NFBAD,
+	NFREG,  NFBAD,  NFLNK, NFBAD,
+	NFSOCK, NFBAD,  NFLNK, NFBAD,
+};
+
+
+/*
+ * XDR functions for basic NFS types
+ */
+static inline u32 *
+decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	fh_init(fhp, NFS_FHSIZE);
+	memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
+	fhp->fh_handle.fh_size = NFS_FHSIZE;
+
+	/* FIXME: Look up export pointer here and verify
+	 * Sun Secure RPC if requested */
+	return p + (NFS_FHSIZE >> 2);
+}
+
+static inline u32 *
+encode_fh(u32 *p, struct svc_fh *fhp)
+{
+	memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
+	return p + (NFS_FHSIZE>> 2);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static inline u32 *
+decode_filename(u32 *p, char **namp, int *lenp)
+{
+	char		*name;
+	int		i;
+
+	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
+		for (i = 0, name = *namp; i < *lenp; i++, name++) {
+			if (*name == '\0' || *name == '/')
+				return NULL;
+		}
+	}
+
+	return p;
+}
+
+static inline u32 *
+decode_pathname(u32 *p, char **namp, int *lenp)
+{
+	char		*name;
+	int		i;
+
+	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
+		for (i = 0, name = *namp; i < *lenp; i++, name++) {
+			if (*name == '\0')
+				return NULL;
+		}
+	}
+
+	return p;
+}
+
+static inline u32 *
+decode_sattr(u32 *p, struct iattr *iap)
+{
+	u32	tmp, tmp1;
+
+	iap->ia_valid = 0;
+
+	/* Sun client bug compatibility check: some sun clients seem to
+	 * put 0xffff in the mode field when they mean 0xffffffff.
+	 * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah.
+	 */
+	if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) {
+		iap->ia_valid |= ATTR_MODE;
+		iap->ia_mode = tmp;
+	}
+	if ((tmp = ntohl(*p++)) != (u32)-1) {
+		iap->ia_valid |= ATTR_UID;
+		iap->ia_uid = tmp;
+	}
+	if ((tmp = ntohl(*p++)) != (u32)-1) {
+		iap->ia_valid |= ATTR_GID;
+		iap->ia_gid = tmp;
+	}
+	if ((tmp = ntohl(*p++)) != (u32)-1) {
+		iap->ia_valid |= ATTR_SIZE;
+		iap->ia_size = tmp;
+	}
+	tmp  = ntohl(*p++); tmp1 = ntohl(*p++);
+	if (tmp != (u32)-1 && tmp1 != (u32)-1) {
+		iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+		iap->ia_atime.tv_sec = tmp;
+		iap->ia_atime.tv_nsec = tmp1 * 1000; 
+	}
+	tmp  = ntohl(*p++); tmp1 = ntohl(*p++);
+	if (tmp != (u32)-1 && tmp1 != (u32)-1) {
+		iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
+		iap->ia_mtime.tv_sec = tmp;
+		iap->ia_mtime.tv_nsec = tmp1 * 1000; 
+		/*
+		 * Passing the invalid value useconds=1000000 for mtime
+		 * is a Sun convention for "set both mtime and atime to
+		 * current server time".  It's needed to make permissions
+		 * checks for the "touch" program across v2 mounts to
+		 * Solaris and Irix boxes work correctly. See description of
+		 * sattr in section 6.1 of "NFS Illustrated" by
+		 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
+		 */
+		if (tmp1 == 1000000)
+			iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET);
+	}
+	return p;
+}
+
+static inline u32 *
+encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	struct vfsmount *mnt = fhp->fh_export->ex_mnt;
+	struct dentry	*dentry = fhp->fh_dentry;
+	struct kstat stat;
+	int type;
+	struct timespec time;
+
+	vfs_getattr(mnt, dentry, &stat);
+	type = (stat.mode & S_IFMT);
+
+	*p++ = htonl(nfs_ftypes[type >> 12]);
+	*p++ = htonl((u32) stat.mode);
+	*p++ = htonl((u32) stat.nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
+
+	if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) {
+		*p++ = htonl(NFS_MAXPATHLEN);
+	} else {
+		*p++ = htonl((u32) stat.size);
+	}
+	*p++ = htonl((u32) stat.blksize);
+	if (S_ISCHR(type) || S_ISBLK(type))
+		*p++ = htonl(new_encode_dev(stat.rdev));
+	else
+		*p++ = htonl(0xffffffff);
+	*p++ = htonl((u32) stat.blocks);
+	if (is_fsid(fhp, rqstp->rq_reffh))
+		*p++ = htonl((u32) fhp->fh_export->ex_fsid);
+	else
+		*p++ = htonl(new_encode_dev(stat.dev));
+	*p++ = htonl((u32) stat.ino);
+	*p++ = htonl((u32) stat.atime.tv_sec);
+	*p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0);
+	lease_get_mtime(dentry->d_inode, &time); 
+	*p++ = htonl((u32) time.tv_sec);
+	*p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
+	*p++ = htonl((u32) stat.ctime.tv_sec);
+	*p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0);
+
+	return p;
+}
+
+
+/*
+ * XDR decode functions
+ */
+int
+nfssvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_sattrargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_sattr(p, &args->attrs)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_diropargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	 return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_readargs *args)
+{
+	unsigned int len;
+	int v,pn;
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+
+	args->offset    = ntohl(*p++);
+	len = args->count     = ntohl(*p++);
+	p++; /* totalcount - unused */
+
+	if (len > NFSSVC_MAXBLKSIZE)
+		len = NFSSVC_MAXBLKSIZE;
+
+	/* set up somewhere to store response.
+	 * We take pages, put them on reslist and include in iovec
+	 */
+	v=0;
+	while (len > 0) {
+		pn=rqstp->rq_resused;
+		svc_take_page(rqstp);
+		args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+		len -= args->vec[v].iov_len;
+		v++;
+	}
+	args->vlen = v;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_writeargs *args)
+{
+	unsigned int len;
+	int v;
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+
+	p++;				/* beginoffset */
+	args->offset = ntohl(*p++);	/* offset */
+	p++;				/* totalcount */
+	len = args->len = ntohl(*p++);
+	args->vec[0].iov_base = (void*)p;
+	args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+				(((void*)p) - rqstp->rq_arg.head[0].iov_base);
+	if (len > NFSSVC_MAXBLKSIZE)
+		len = NFSSVC_MAXBLKSIZE;
+	v = 0;
+	while (len > args->vec[v].iov_len) {
+		len -= args->vec[v].iov_len;
+		v++;
+		args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+		args->vec[v].iov_len = PAGE_SIZE;
+	}
+	args->vec[v].iov_len = len;
+	args->vlen = v+1;
+	return args->vec[0].iov_len > 0;
+}
+
+int
+nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_createargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len))
+	 || !(p = decode_sattr(p, &args->attrs)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_renameargs *args)
+{
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_filename(p, &args->fname, &args->flen))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinkargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	svc_take_page(rqstp);
+	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_linkargs *args)
+{
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_symlinkargs *args)
+{
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_filename(p, &args->fname, &args->flen))
+	 || !(p = decode_pathname(p, &args->tname, &args->tlen))
+	 || !(p = decode_sattr(p, &args->attrs)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_readdirargs *args)
+{
+	if (!(p = decode_fh(p, &args->fh)))
+		return 0;
+	args->cookie = ntohl(*p++);
+	args->count  = ntohl(*p++);
+	if (args->count > PAGE_SIZE)
+		args->count = PAGE_SIZE;
+
+	svc_take_page(rqstp);
+	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+int
+nfssvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_attrstat *resp)
+{
+	p = encode_fattr(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_diropres *resp)
+{
+	p = encode_fh(p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_readlinkres *resp)
+{
+	*p++ = htonl(resp->len);
+	xdr_ressize_check(rqstp, p);
+	rqstp->rq_res.page_len = resp->len;
+	if (resp->len & 3) {
+		/* need to pad the tail */
+		rqstp->rq_restailpage = 0;
+		rqstp->rq_res.tail[0].iov_base = p;
+		*p = 0;
+		rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+	}
+	return 1;
+}
+
+int
+nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_readres *resp)
+{
+	p = encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->count);
+	xdr_ressize_check(rqstp, p);
+
+	/* now update rqstp->rq_res to reflect data aswell */
+	rqstp->rq_res.page_len = resp->count;
+	if (resp->count & 3) {
+		/* need to pad the tail */
+		rqstp->rq_restailpage = 0;
+		rqstp->rq_res.tail[0].iov_base = p;
+		*p = 0;
+		rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
+	}
+	return 1;
+}
+
+int
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_readdirres *resp)
+{
+	xdr_ressize_check(rqstp, p);
+	p = resp->buffer;
+	*p++ = 0;			/* no more entries */
+	*p++ = htonl((resp->common.err == nfserr_eof));
+	rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1;
+
+	return 1;
+}
+
+int
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_statfsres *resp)
+{
+	struct kstatfs	*stat = &resp->stats;
+
+	*p++ = htonl(NFSSVC_MAXBLKSIZE);	/* max transfer size */
+	*p++ = htonl(stat->f_bsize);
+	*p++ = htonl(stat->f_blocks);
+	*p++ = htonl(stat->f_bfree);
+	*p++ = htonl(stat->f_bavail);
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
+		    int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
+	u32	*p = cd->buffer;
+	int	buflen, slen;
+
+	/*
+	dprintk("nfsd: entry(%.*s off %ld ino %ld)\n",
+			namlen, name, offset, ino);
+	 */
+
+	if (offset > ~((u32) 0)) {
+		cd->common.err = nfserr_fbig;
+		return -EINVAL;
+	}
+	if (cd->offset)
+		*cd->offset = htonl(offset);
+	if (namlen > NFS2_MAXNAMLEN)
+		namlen = NFS2_MAXNAMLEN;/* truncate filename */
+
+	slen = XDR_QUADLEN(namlen);
+	if ((buflen = cd->buflen - slen - 4) < 0) {
+		cd->common.err = nfserr_toosmall;
+		return -EINVAL;
+	}
+	*p++ = xdr_one;				/* mark entry present */
+	*p++ = htonl((u32) ino);		/* file id */
+	p    = xdr_encode_array(p, name, namlen);/* name length & name */
+	cd->offset = p;			/* remember pointer */
+	*p++ = ~(u32) 0;		/* offset of next entry */
+
+	cd->buflen = buflen;
+	cd->buffer = p;
+	cd->common.err = nfs_ok;
+	return 0;
+}
+
+/*
+ * XDR release functions
+ */
+int
+nfssvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+					struct nfsd_fhandle *resp)
+{
+	fh_put(&resp->fh);
+	return 1;
+}
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
new file mode 100644
index 000000000000..1cf955bcc526
--- /dev/null
+++ b/fs/nfsd/stats.c
@@ -0,0 +1,101 @@
+/*
+ * linux/fs/nfsd/stats.c
+ *
+ * procfs-based user access to knfsd statistics
+ *
+ * /proc/net/rpc/nfsd
+ *
+ * Format:
+ *	rc <hits> <misses> <nocache>
+ *			Statistsics for the reply cache
+ *	fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
+ *			statistics for filehandle lookup
+ *	io <bytes-read> <bytes-writtten>
+ *			statistics for IO throughput
+ *	th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 
+ *			time (seconds) when nfsd thread usage above thresholds
+ *			and number of times that all threads were in use
+ *	ra cache-size  <10%  <20%  <30% ... <100% not-found
+ *			number of times that read-ahead entry was found that deep in
+ *			the cache.
+ *	plus generic RPC stats (see net/sunrpc/stats.c)
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/module.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/stats.h>
+
+struct nfsd_stats	nfsdstats;
+struct svc_stat		nfsd_svcstats = {
+	.program	= &nfsd_program,
+};
+
+static int nfsd_proc_show(struct seq_file *seq, void *v)
+{
+	int i;
+
+	seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
+		      nfsdstats.rchits,
+		      nfsdstats.rcmisses,
+		      nfsdstats.rcnocache,
+		      nfsdstats.fh_stale,
+		      nfsdstats.fh_lookup,
+		      nfsdstats.fh_anon,
+		      nfsdstats.fh_nocache_dir,
+		      nfsdstats.fh_nocache_nondir,
+		      nfsdstats.io_read,
+		      nfsdstats.io_write);
+	/* thread usage: */
+	seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
+	for (i=0; i<10; i++) {
+		unsigned int jifs = nfsdstats.th_usage[i];
+		unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ;
+		seq_printf(seq, " %u.%03u", sec, msec);
+	}
+
+	/* newline and ra-cache */
+	seq_printf(seq, "\nra %u", nfsdstats.ra_size);
+	for (i=0; i<11; i++)
+		seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
+	seq_putc(seq, '\n');
+	
+	/* show my rpc info */
+	svc_seq_show(seq, &nfsd_svcstats);
+
+	return 0;
+}
+
+static int nfsd_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nfsd_proc_show, NULL);
+}
+
+static struct file_operations nfsd_proc_fops = {
+	.owner = THIS_MODULE,
+	.open = nfsd_proc_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void
+nfsd_stat_init(void)
+{
+	svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops);
+}
+
+void
+nfsd_stat_shutdown(void)
+{
+	svc_proc_unregister("nfsd");
+}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
new file mode 100644
index 000000000000..e3e9d217236e
--- /dev/null
+++ b/fs/nfsd/vfs.c
@@ -0,0 +1,1859 @@
+#define MSNFS	/* HACK HACK */
+/*
+ * linux/fs/nfsd/vfs.c
+ *
+ * File operations used by nfsd. Some of these have been ripped from
+ * other parts of the kernel because they weren't exported, others
+ * are partial duplicates with added or changed functionality.
+ *
+ * Note that several functions dget() the dentry upon which they want
+ * to act, most notably those that create directory entries. Response
+ * dentry's are dput()'d if necessary in the release callback.
+ * So if you notice code paths that apparently fail to dput() the
+ * dentry, don't worry--they have been taken care of.
+ *
+ * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
+ * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
+ */
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/major.h>
+#include <linux/ext2_fs.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/delay.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#ifdef CONFIG_NFSD_V3
+#include <linux/nfs3.h>
+#include <linux/nfsd/xdr3.h>
+#endif /* CONFIG_NFSD_V3 */
+#include <linux/nfsd/nfsfh.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#ifdef CONFIG_NFSD_V4
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr_acl.h>
+#include <linux/xattr.h>
+#include <linux/nfs4.h>
+#include <linux/nfs4_acl.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/security.h>
+#endif /* CONFIG_NFSD_V4 */
+
+#include <asm/uaccess.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_FILEOP
+#define NFSD_PARANOIA
+
+
+/* We must ignore files (but only files) which might have mandatory
+ * locks on them because there is no way to know if the accesser has
+ * the lock.
+ */
+#define IS_ISMNDLK(i)	(S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
+
+/*
+ * This is a cache of readahead params that help us choose the proper
+ * readahead strategy. Initially, we set all readahead parameters to 0
+ * and let the VFS handle things.
+ * If you increase the number of cached files very much, you'll need to
+ * add a hash table here.
+ */
+struct raparms {
+	struct raparms		*p_next;
+	unsigned int		p_count;
+	ino_t			p_ino;
+	dev_t			p_dev;
+	int			p_set;
+	struct file_ra_state	p_ra;
+};
+
+static struct raparms *		raparml;
+static struct raparms *		raparm_cache;
+
+/* 
+ * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
+ * a mount point.
+ * Returns -EAGAIN leaving *dpp and *expp unchanged, 
+ *  or nfs_ok having possibly changed *dpp and *expp
+ */
+int
+nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, 
+		        struct svc_export **expp)
+{
+	struct svc_export *exp = *expp, *exp2 = NULL;
+	struct dentry *dentry = *dpp;
+	struct vfsmount *mnt = mntget(exp->ex_mnt);
+	struct dentry *mounts = dget(dentry);
+	int err = nfs_ok;
+
+	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+
+	exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle);
+	if (IS_ERR(exp2)) {
+		err = PTR_ERR(exp2);
+		dput(mounts);
+		mntput(mnt);
+		goto out;
+	}
+	if (exp2 && ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2))) {
+		/* successfully crossed mount point */
+		exp_put(exp);
+		*expp = exp2;
+		dput(dentry);
+		*dpp = mounts;
+	} else {
+		if (exp2) exp_put(exp2);
+		dput(mounts);
+	}
+	mntput(mnt);
+out:
+	return err;
+}
+
+/*
+ * Look up one component of a pathname.
+ * N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+ * If the lookup would cross a mountpoint, and the mounted filesystem
+ * is exported to the client with NFSEXP_NOHIDE, then the lookup is
+ * accepted as it stands and the mounted directory is
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ *   clients and is explicitly disallowed for NFSv3
+ *      NeilBrown <neilb@cse.unsw.edu.au>
+ */
+int
+nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+					int len, struct svc_fh *resfh)
+{
+	struct svc_export	*exp;
+	struct dentry		*dparent;
+	struct dentry		*dentry;
+	int			err;
+
+	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
+
+	/* Obtain dentry and export. */
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
+	if (err)
+		return err;
+
+	dparent = fhp->fh_dentry;
+	exp  = fhp->fh_export;
+	exp_get(exp);
+
+	err = nfserr_acces;
+
+	/* Lookup the name, but don't follow links */
+	if (isdotent(name, len)) {
+		if (len==1)
+			dentry = dget(dparent);
+		else if (dparent != exp->ex_dentry) {
+			dentry = dget_parent(dparent);
+		} else  if (!EX_NOHIDE(exp))
+			dentry = dget(dparent); /* .. == . just like at / */
+		else {
+			/* checking mountpoint crossing is very different when stepping up */
+			struct svc_export *exp2 = NULL;
+			struct dentry *dp;
+			struct vfsmount *mnt = mntget(exp->ex_mnt);
+			dentry = dget(dparent);
+			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+				;
+			dp = dget_parent(dentry);
+			dput(dentry);
+			dentry = dp;
+
+			exp2 = exp_parent(exp->ex_client, mnt, dentry,
+					  &rqstp->rq_chandle);
+			if (IS_ERR(exp2)) {
+				err = PTR_ERR(exp2);
+				dput(dentry);
+				mntput(mnt);
+				goto out_nfserr;
+			}
+			if (!exp2) {
+				dput(dentry);
+				dentry = dget(dparent);
+			} else {
+				exp_put(exp);
+				exp = exp2;
+			}
+			mntput(mnt);
+		}
+	} else {
+		fh_lock(fhp);
+		dentry = lookup_one_len(name, dparent, len);
+		err = PTR_ERR(dentry);
+		if (IS_ERR(dentry))
+			goto out_nfserr;
+		/*
+		 * check if we have crossed a mount point ...
+		 */
+		if (d_mountpoint(dentry)) {
+			if ((err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
+				dput(dentry);
+				goto out_nfserr;
+			}
+		}
+	}
+	/*
+	 * Note: we compose the file handle now, but as the
+	 * dentry may be negative, it may need to be updated.
+	 */
+	err = fh_compose(resfh, exp, dentry, fhp);
+	if (!err && !dentry->d_inode)
+		err = nfserr_noent;
+	dput(dentry);
+out:
+	exp_put(exp);
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Set various file attributes.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+	     int check_guard, time_t guardtime)
+{
+	struct dentry	*dentry;
+	struct inode	*inode;
+	int		accmode = MAY_SATTR;
+	int		ftype = 0;
+	int		imode;
+	int		err;
+	int		size_change = 0;
+
+	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+		accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
+	if (iap->ia_valid & ATTR_SIZE)
+		ftype = S_IFREG;
+
+	/* Get inode */
+	err = fh_verify(rqstp, fhp, ftype, accmode);
+	if (err || !iap->ia_valid)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+
+	/* NFSv2 does not differentiate between "set-[ac]time-to-now"
+	 * which only requires access, and "set-[ac]time-to-X" which
+	 * requires ownership.
+	 * So if it looks like it might be "set both to the same time which
+	 * is close to now", and if inode_change_ok fails, then we
+	 * convert to "set to now" instead of "set to explicit time"
+	 *
+	 * We only call inode_change_ok as the last test as technically
+	 * it is not an interface that we should be using.  It is only
+	 * valid if the filesystem does not define it's own i_op->setattr.
+	 */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define	MAX_TOUCH_TIME_ERROR (30*60)
+	if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
+	    && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec
+	    ) {
+	    /* Looks probable.  Now just make sure time is in the right ballpark.
+	     * Solaris, at least, doesn't seem to care what the time request is.
+	     * We require it be within 30 minutes of now.
+	     */
+	    time_t delta = iap->ia_atime.tv_sec - get_seconds();
+	    if (delta<0) delta = -delta;
+	    if (delta < MAX_TOUCH_TIME_ERROR &&
+		inode_change_ok(inode, iap) != 0) {
+		/* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
+		 * this will cause notify_change to set these times to "now"
+		 */
+		iap->ia_valid &= ~BOTH_TIME_SET;
+	    }
+	}
+	    
+	/* The size case is special. It changes the file as well as the attributes.  */
+	if (iap->ia_valid & ATTR_SIZE) {
+		if (iap->ia_size < inode->i_size) {
+			err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * If we are changing the size of the file, then
+		 * we need to break all leases.
+		 */
+		err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+		if (err == -EWOULDBLOCK)
+			err = -ETIMEDOUT;
+		if (err) /* ENOMEM or EWOULDBLOCK */
+			goto out_nfserr;
+
+		err = get_write_access(inode);
+		if (err)
+			goto out_nfserr;
+
+		size_change = 1;
+		err = locks_verify_truncate(inode, NULL, iap->ia_size);
+		if (err) {
+			put_write_access(inode);
+			goto out_nfserr;
+		}
+		DQUOT_INIT(inode);
+	}
+
+	imode = inode->i_mode;
+	if (iap->ia_valid & ATTR_MODE) {
+		iap->ia_mode &= S_IALLUGO;
+		imode = iap->ia_mode |= (imode & ~S_IALLUGO);
+	}
+
+	/* Revoke setuid/setgid bit on chown/chgrp */
+	if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
+		iap->ia_valid |= ATTR_KILL_SUID;
+	if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+		iap->ia_valid |= ATTR_KILL_SGID;
+
+	/* Change the attributes. */
+
+	iap->ia_valid |= ATTR_CTIME;
+
+	err = nfserr_notsync;
+	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
+		fh_lock(fhp);
+		err = notify_change(dentry, iap);
+		err = nfserrno(err);
+		fh_unlock(fhp);
+	}
+	if (size_change)
+		put_write_access(inode);
+	if (!err)
+		if (EX_ISSYNC(fhp->fh_export))
+			write_inode_now(inode, 1);
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+#if defined(CONFIG_NFSD_V4)
+
+static int
+set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
+{
+	int len;
+	size_t buflen;
+	char *buf = NULL;
+	int error = 0;
+	struct inode *inode = dentry->d_inode;
+
+	buflen = posix_acl_xattr_size(pacl->a_count);
+	buf = kmalloc(buflen, GFP_KERNEL);
+	error = -ENOMEM;
+	if (buf == NULL)
+		goto out;
+
+	len = posix_acl_to_xattr(pacl, buf, buflen);
+	if (len < 0) {
+		error = len;
+		goto out;
+	}
+
+	error = -EOPNOTSUPP;
+	if (inode->i_op && inode->i_op->setxattr) {
+		down(&inode->i_sem);
+		security_inode_setxattr(dentry, key, buf, len, 0);
+		error = inode->i_op->setxattr(dentry, key, buf, len, 0);
+		if (!error)
+			security_inode_post_setxattr(dentry, key, buf, len, 0);
+		up(&inode->i_sem);
+	}
+out:
+	kfree(buf);
+	return error;
+}
+
+int
+nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+    struct nfs4_acl *acl)
+{
+	int error;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct posix_acl *pacl = NULL, *dpacl = NULL;
+	unsigned int flags = 0;
+
+	/* Get inode */
+	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
+	if (error)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+	if (S_ISDIR(inode->i_mode))
+		flags = NFS4_ACL_DIR;
+
+	error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
+	if (error == -EINVAL) {
+		error = nfserr_attrnotsupp;
+		goto out;
+	} else if (error < 0)
+		goto out_nfserr;
+
+	if (pacl) {
+		error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
+		if (error < 0)
+			goto out_nfserr;
+	}
+
+	if (dpacl) {
+		error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
+		if (error < 0)
+			goto out_nfserr;
+	}
+
+	error = nfs_ok;
+
+out:
+	posix_acl_release(pacl);
+	posix_acl_release(dpacl);
+	return (error);
+out_nfserr:
+	error = nfserrno(error);
+	goto out;
+}
+
+static struct posix_acl *
+_get_posix_acl(struct dentry *dentry, char *key)
+{
+	struct inode *inode = dentry->d_inode;
+	char *buf = NULL;
+	int buflen, error = 0;
+	struct posix_acl *pacl = NULL;
+
+	error = -EOPNOTSUPP;
+	if (inode->i_op == NULL)
+		goto out_err;
+	if (inode->i_op->getxattr == NULL)
+		goto out_err;
+
+	error = security_inode_getxattr(dentry, key);
+	if (error)
+		goto out_err;
+
+	buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
+	if (buflen <= 0) {
+		error = buflen < 0 ? buflen : -ENODATA;
+		goto out_err;
+	}
+
+	buf = kmalloc(buflen, GFP_KERNEL);
+	if (buf == NULL) {
+		error = -ENOMEM;
+		goto out_err;
+	}
+
+	error = inode->i_op->getxattr(dentry, key, buf, buflen);
+	if (error < 0)
+		goto out_err;
+
+	pacl = posix_acl_from_xattr(buf, buflen);
+ out:
+	kfree(buf);
+	return pacl;
+ out_err:
+	pacl = ERR_PTR(error);
+	goto out;
+}
+
+int
+nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
+{
+	struct inode *inode = dentry->d_inode;
+	int error = 0;
+	struct posix_acl *pacl = NULL, *dpacl = NULL;
+	unsigned int flags = 0;
+
+	pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
+	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
+		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+	if (IS_ERR(pacl)) {
+		error = PTR_ERR(pacl);
+		pacl = NULL;
+		goto out;
+	}
+
+	if (S_ISDIR(inode->i_mode)) {
+		dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
+		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
+			dpacl = NULL;
+		else if (IS_ERR(dpacl)) {
+			error = PTR_ERR(dpacl);
+			dpacl = NULL;
+			goto out;
+		}
+		flags = NFS4_ACL_DIR;
+	}
+
+	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
+	if (IS_ERR(*acl)) {
+		error = PTR_ERR(*acl);
+		*acl = NULL;
+	}
+ out:
+	posix_acl_release(pacl);
+	posix_acl_release(dpacl);
+	return error;
+}
+
+#endif /* defined(CONFIG_NFS_V4) */
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Check server access rights to a file system object
+ */
+struct accessmap {
+	u32		access;
+	int		how;
+};
+static struct accessmap	nfs3_regaccess[] = {
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_WRITE|MAY_TRUNC		},
+    {	NFS3_ACCESS_EXTEND,	MAY_WRITE			},
+
+    {	0,			0				}
+};
+
+static struct accessmap	nfs3_diraccess[] = {
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_LOOKUP,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_EXEC|MAY_WRITE|MAY_TRUNC	},
+    {	NFS3_ACCESS_EXTEND,	MAY_EXEC|MAY_WRITE		},
+    {	NFS3_ACCESS_DELETE,	MAY_REMOVE			},
+
+    {	0,			0				}
+};
+
+static struct accessmap	nfs3_anyaccess[] = {
+	/* Some clients - Solaris 2.6 at least, make an access call
+	 * to the server to check for access for things like /dev/null
+	 * (which really, the server doesn't care about).  So
+	 * We provide simple access checking for them, looking
+	 * mainly at mode bits, and we make sure to ignore read-only
+	 * filesystem checks
+	 */
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_WRITE|MAY_LOCAL_ACCESS	},
+    {	NFS3_ACCESS_EXTEND,	MAY_WRITE|MAY_LOCAL_ACCESS	},
+
+    {	0,			0				}
+};
+
+int
+nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
+{
+	struct accessmap	*map;
+	struct svc_export	*export;
+	struct dentry		*dentry;
+	u32			query, result = 0, sresult = 0;
+	unsigned int		error;
+
+	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	if (error)
+		goto out;
+
+	export = fhp->fh_export;
+	dentry = fhp->fh_dentry;
+
+	if (S_ISREG(dentry->d_inode->i_mode))
+		map = nfs3_regaccess;
+	else if (S_ISDIR(dentry->d_inode->i_mode))
+		map = nfs3_diraccess;
+	else
+		map = nfs3_anyaccess;
+
+
+	query = *access;
+	for  (; map->access; map++) {
+		if (map->access & query) {
+			unsigned int err2;
+
+			sresult |= map->access;
+
+			err2 = nfsd_permission(export, dentry, map->how);
+			switch (err2) {
+			case nfs_ok:
+				result |= map->access;
+				break;
+				
+			/* the following error codes just mean the access was not allowed,
+			 * rather than an error occurred */
+			case nfserr_rofs:
+			case nfserr_acces:
+			case nfserr_perm:
+				/* simply don't "or" in the access bit. */
+				break;
+			default:
+				error = err2;
+				goto out;
+			}
+		}
+	}
+	*access = result;
+	if (supported)
+		*supported = sresult;
+
+ out:
+	return error;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+
+
+/*
+ * Open an existing file or directory.
+ * The access argument indicates the type of open (read/write/lock)
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+			int access, struct file **filp)
+{
+	struct dentry	*dentry;
+	struct inode	*inode;
+	int		flags = O_RDONLY|O_LARGEFILE, err;
+
+	/*
+	 * If we get here, then the client has already done an "open",
+	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+	 * in case a chmod has now revoked permission.
+	 */
+	err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+
+	/* Disallow write access to files with the append-only bit set
+	 * or any access when mandatory locking enabled
+	 */
+	err = nfserr_perm;
+	if (IS_APPEND(inode) && (access & MAY_WRITE))
+		goto out;
+	if (IS_ISMNDLK(inode))
+		goto out;
+
+	if (!inode->i_fop)
+		goto out;
+
+	/*
+	 * Check to see if there are any leases on this file.
+	 * This may block while leases are broken.
+	 */
+	err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+	if (err == -EWOULDBLOCK)
+		err = -ETIMEDOUT;
+	if (err) /* NOMEM or WOULDBLOCK */
+		goto out_nfserr;
+
+	if (access & MAY_WRITE) {
+		flags = O_WRONLY|O_LARGEFILE;
+
+		DQUOT_INIT(inode);
+	}
+	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
+	if (IS_ERR(*filp))
+		err = PTR_ERR(*filp);
+out_nfserr:
+	if (err)
+		err = nfserrno(err);
+out:
+	return err;
+}
+
+/*
+ * Close a file.
+ */
+void
+nfsd_close(struct file *filp)
+{
+	fput(filp);
+}
+
+/*
+ * Sync a file
+ * As this calls fsync (not fdatasync) there is no need for a write_inode
+ * after it.
+ */
+static inline void nfsd_dosync(struct file *filp, struct dentry *dp,
+			       struct file_operations *fop)
+{
+	struct inode *inode = dp->d_inode;
+	int (*fsync) (struct file *, struct dentry *, int);
+
+	filemap_fdatawrite(inode->i_mapping);
+	if (fop && (fsync = fop->fsync))
+		fsync(filp, dp, 0);
+	filemap_fdatawait(inode->i_mapping);
+}
+	
+
+static void
+nfsd_sync(struct file *filp)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
+	down(&inode->i_sem);
+	nfsd_dosync(filp, filp->f_dentry, filp->f_op);
+	up(&inode->i_sem);
+}
+
+static void
+nfsd_sync_dir(struct dentry *dp)
+{
+	nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
+}
+
+/*
+ * Obtain the readahead parameters for the file
+ * specified by (dev, ino).
+ */
+static DEFINE_SPINLOCK(ra_lock);
+
+static inline struct raparms *
+nfsd_get_raparms(dev_t dev, ino_t ino)
+{
+	struct raparms	*ra, **rap, **frap = NULL;
+	int depth = 0;
+
+	spin_lock(&ra_lock);
+	for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
+		if (ra->p_ino == ino && ra->p_dev == dev)
+			goto found;
+		depth++;
+		if (ra->p_count == 0)
+			frap = rap;
+	}
+	depth = nfsdstats.ra_size*11/10;
+	if (!frap) {	
+		spin_unlock(&ra_lock);
+		return NULL;
+	}
+	rap = frap;
+	ra = *frap;
+	ra->p_dev = dev;
+	ra->p_ino = ino;
+	ra->p_set = 0;
+found:
+	if (rap != &raparm_cache) {
+		*rap = ra->p_next;
+		ra->p_next   = raparm_cache;
+		raparm_cache = ra;
+	}
+	ra->p_count++;
+	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
+	spin_unlock(&ra_lock);
+	return ra;
+}
+
+/*
+ * Grab and keep cached pages assosiated with a file in the svc_rqst
+ * so that they can be passed to the netowork sendmsg/sendpage routines
+ * directrly. They will be released after the sending has completed.
+ */
+static int
+nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+{
+	unsigned long count = desc->count;
+	struct svc_rqst *rqstp = desc->arg.data;
+
+	if (size > count)
+		size = count;
+
+	if (rqstp->rq_res.page_len == 0) {
+		get_page(page);
+		rqstp->rq_respages[rqstp->rq_resused++] = page;
+		rqstp->rq_res.page_base = offset;
+		rqstp->rq_res.page_len = size;
+	} else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
+		get_page(page);
+		rqstp->rq_respages[rqstp->rq_resused++] = page;
+		rqstp->rq_res.page_len += size;
+	} else {
+		rqstp->rq_res.page_len += size;
+	}
+
+	desc->count = count - size;
+	desc->written += size;
+	return size;
+}
+
+static inline int
+nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+{
+	struct inode *inode;
+	struct raparms	*ra;
+	mm_segment_t	oldfs;
+	int		err;
+
+	err = nfserr_perm;
+	inode = file->f_dentry->d_inode;
+#ifdef MSNFS
+	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+		(!lock_may_read(inode, offset, *count)))
+		goto out;
+#endif
+
+	/* Get readahead parameters */
+	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+
+	if (ra && ra->p_set)
+		file->f_ra = ra->p_ra;
+
+	if (file->f_op->sendfile) {
+		svc_pushback_unused_pages(rqstp);
+		err = file->f_op->sendfile(file, &offset, *count,
+						 nfsd_read_actor, rqstp);
+	} else {
+		oldfs = get_fs();
+		set_fs(KERNEL_DS);
+		err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+		set_fs(oldfs);
+	}
+
+	/* Write back readahead params */
+	if (ra) {
+		spin_lock(&ra_lock);
+		ra->p_ra = file->f_ra;
+		ra->p_set = 1;
+		ra->p_count--;
+		spin_unlock(&ra_lock);
+	}
+
+	if (err >= 0) {
+		nfsdstats.io_read += err;
+		*count = err;
+		err = 0;
+		dnotify_parent(file->f_dentry, DN_ACCESS);
+	} else 
+		err = nfserrno(err);
+out:
+	return err;
+}
+
+static inline int
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+				loff_t offset, struct kvec *vec, int vlen,
+	   			unsigned long cnt, int *stablep)
+{
+	struct svc_export	*exp;
+	struct dentry		*dentry;
+	struct inode		*inode;
+	mm_segment_t		oldfs;
+	int			err = 0;
+	int			stable = *stablep;
+
+	err = nfserr_perm;
+
+#ifdef MSNFS
+	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+		(!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
+		goto out;
+#endif
+
+	dentry = file->f_dentry;
+	inode = dentry->d_inode;
+	exp   = fhp->fh_export;
+
+	/*
+	 * Request sync writes if
+	 *  -	the sync export option has been set, or
+	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
+	 *  -   The file system doesn't support fsync().
+	 * When gathered writes have been configured for this volume,
+	 * flushing the data to disk is handled separately below.
+	 */
+
+	if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */
+	       stable = 2;
+	       *stablep = 2; /* FILE_SYNC */
+	}
+
+	if (!EX_ISSYNC(exp))
+		stable = 0;
+	if (stable && !EX_WGATHER(exp))
+		file->f_flags |= O_SYNC;
+
+	/* Write the data. */
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+	set_fs(oldfs);
+	if (err >= 0) {
+		nfsdstats.io_write += cnt;
+		dnotify_parent(file->f_dentry, DN_MODIFY);
+	}
+
+	/* clear setuid/setgid flag after write */
+	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) {
+		struct iattr	ia;
+		ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
+
+		down(&inode->i_sem);
+		notify_change(dentry, &ia);
+		up(&inode->i_sem);
+	}
+
+	if (err >= 0 && stable) {
+		static ino_t	last_ino;
+		static dev_t	last_dev;
+
+		/*
+		 * Gathered writes: If another process is currently
+		 * writing to the file, there's a high chance
+		 * this is another nfsd (triggered by a bulk write
+		 * from a client's biod). Rather than syncing the
+		 * file with each write request, we sleep for 10 msec.
+		 *
+		 * I don't know if this roughly approximates
+		 * C. Juszak's idea of gathered writes, but it's a
+		 * nice and simple solution (IMHO), and it seems to
+		 * work:-)
+		 */
+		if (EX_WGATHER(exp)) {
+			if (atomic_read(&inode->i_writecount) > 1
+			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+				dprintk("nfsd: write defer %d\n", current->pid);
+				msleep(10);
+				dprintk("nfsd: write resume %d\n", current->pid);
+			}
+
+			if (inode->i_state & I_DIRTY) {
+				dprintk("nfsd: write sync %d\n", current->pid);
+				nfsd_sync(file);
+			}
+#if 0
+			wake_up(&inode->i_wait);
+#endif
+		}
+		last_ino = inode->i_ino;
+		last_dev = inode->i_sb->s_dev;
+	}
+
+	dprintk("nfsd: write complete err=%d\n", err);
+	if (err >= 0)
+		err = 0;
+	else 
+		err = nfserrno(err);
+out:
+	return err;
+}
+
+/*
+ * Read data from a file. count must contain the requested read count
+ * on entry. On return, *count contains the number of bytes actually read.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+		loff_t offset, struct kvec *vec, int vlen,
+		unsigned long *count)
+{
+	int		err;
+
+	if (file) {
+		err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
+				MAY_READ|MAY_OWNER_OVERRIDE);
+		if (err)
+			goto out;
+		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+	} else {
+		err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
+		if (err)
+			goto out;
+		err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+		nfsd_close(file);
+	}
+out:
+	return err;
+}
+
+/*
+ * Write data to a file.
+ * The stable flag requests synchronous writes.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+		loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
+		int *stablep)
+{
+	int			err = 0;
+
+	if (file) {
+		err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
+				MAY_WRITE|MAY_OWNER_OVERRIDE);
+		if (err)
+			goto out;
+		err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
+				stablep);
+	} else {
+		err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
+		if (err)
+			goto out;
+
+		if (cnt)
+			err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
+					     cnt, stablep);
+		nfsd_close(file);
+	}
+out:
+	return err;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Commit all pending writes to stable storage.
+ * Strictly speaking, we could sync just the indicated file region here,
+ * but there's currently no way we can ask the VFS to do so.
+ *
+ * Unfortunately we cannot lock the file to make sure we return full WCC
+ * data to the client, as locking happens lower down in the filesystem.
+ */
+int
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               loff_t offset, unsigned long count)
+{
+	struct file	*file;
+	int		err;
+
+	if ((u64)count > ~(u64)offset)
+		return nfserr_inval;
+
+	if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+		return err;
+	if (EX_ISSYNC(fhp->fh_export)) {
+		if (file->f_op && file->f_op->fsync) {
+			nfsd_sync(file);
+		} else {
+			err = nfserr_notsupp;
+		}
+	}
+
+	nfsd_close(file);
+	return err;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+/*
+ * Create a file (regular, directory, device, fifo); UNIX sockets 
+ * not yet implemented.
+ * If the response fh has been verified, the parent directory should
+ * already be locked. Note that the parent directory is left locked.
+ *
+ * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
+ */
+int
+nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		int type, dev_t rdev, struct svc_fh *resfhp)
+{
+	struct dentry	*dentry, *dchild = NULL;
+	struct inode	*dirp;
+	int		err;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	err = nfserr_notdir;
+	if(!dirp->i_op || !dirp->i_op->lookup)
+		goto out;
+	/*
+	 * Check whether the response file handle has been verified yet.
+	 * If it has, the parent directory should already be locked.
+	 */
+	if (!resfhp->fh_dentry) {
+		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
+		fh_lock(fhp);
+		dchild = lookup_one_len(fname, dentry, flen);
+		err = PTR_ERR(dchild);
+		if (IS_ERR(dchild))
+			goto out_nfserr;
+		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+		if (err)
+			goto out;
+	} else {
+		/* called from nfsd_proc_create */
+		dchild = dget(resfhp->fh_dentry);
+		if (!fhp->fh_locked) {
+			/* not actually possible */
+			printk(KERN_ERR
+				"nfsd_create: parent %s/%s not locked!\n",
+				dentry->d_parent->d_name.name,
+				dentry->d_name.name);
+			err = -EIO;
+			goto out;
+		}
+	}
+	/*
+	 * Make sure the child dentry is still negative ...
+	 */
+	err = nfserr_exist;
+	if (dchild->d_inode) {
+		dprintk("nfsd_create: dentry %s/%s not negative!\n",
+			dentry->d_name.name, dchild->d_name.name);
+		goto out; 
+	}
+
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+
+	/*
+	 * Get the dir op function pointer.
+	 */
+	err = nfserr_perm;
+	switch (type) {
+	case S_IFREG:
+		err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+		break;
+	case S_IFDIR:
+		err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+		break;
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+		err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+		break;
+	default:
+	        printk("nfsd: bad file type %o in nfsd_create\n", type);
+		err = -EINVAL;
+	}
+	if (err < 0)
+		goto out_nfserr;
+
+	if (EX_ISSYNC(fhp->fh_export)) {
+		nfsd_sync_dir(dentry);
+		write_inode_now(dchild->d_inode, 1);
+	}
+
+
+	/* Set file attributes. Mode has already been set and
+	 * setting uid/gid works only for root. Irix appears to
+	 * send along the gid when it tries to implement setgid
+	 * directories via NFS.
+	 */
+	err = 0;
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+		err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+	/*
+	 * Update the file handle to get the new inode info.
+	 */
+	if (!err)
+		err = fh_update(resfhp);
+out:
+	if (dchild && !IS_ERR(dchild))
+		dput(dchild);
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * NFSv3 version of nfsd_create
+ */
+int
+nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		struct svc_fh *resfhp, int createmode, u32 *verifier,
+	        int *truncp)
+{
+	struct dentry	*dentry, *dchild = NULL;
+	struct inode	*dirp;
+	int		err;
+	__u32		v_mtime=0, v_atime=0;
+	int		v_mode=0;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	/* Get all the sanity checks out of the way before
+	 * we lock the parent. */
+	err = nfserr_notdir;
+	if(!dirp->i_op || !dirp->i_op->lookup)
+		goto out;
+	fh_lock(fhp);
+
+	/*
+	 * Compose the response file handle.
+	 */
+	dchild = lookup_one_len(fname, dentry, flen);
+	err = PTR_ERR(dchild);
+	if (IS_ERR(dchild))
+		goto out_nfserr;
+
+	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+	if (err)
+		goto out;
+
+	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+		/* while the verifier would fit in mtime+atime,
+		 * solaris7 gets confused (bugid 4218508) if these have
+		 * the high bit set, so we use the mode as well
+		 */
+		v_mtime = verifier[0]&0x7fffffff;
+		v_atime = verifier[1]&0x7fffffff;
+		v_mode  = S_IFREG
+			| ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
+			| ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
+			;
+	}
+	
+	if (dchild->d_inode) {
+		err = 0;
+
+		switch (createmode) {
+		case NFS3_CREATE_UNCHECKED:
+			if (! S_ISREG(dchild->d_inode->i_mode))
+				err = nfserr_exist;
+			else if (truncp) {
+				/* in nfsv4, we need to treat this case a little
+				 * differently.  we don't want to truncate the
+				 * file now; this would be wrong if the OPEN
+				 * fails for some other reason.  furthermore,
+				 * if the size is nonzero, we should ignore it
+				 * according to spec!
+				 */
+				*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
+			}
+			else {
+				iap->ia_valid &= ATTR_SIZE;
+				goto set_attr;
+			}
+			break;
+		case NFS3_CREATE_EXCLUSIVE:
+			if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
+			    && dchild->d_inode->i_atime.tv_sec == v_atime
+			    && dchild->d_inode->i_mode  == v_mode
+			    && dchild->d_inode->i_size  == 0 )
+				break;
+			 /* fallthru */
+		case NFS3_CREATE_GUARDED:
+			err = nfserr_exist;
+		}
+		goto out;
+	}
+
+	err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+	if (err < 0)
+		goto out_nfserr;
+
+	if (EX_ISSYNC(fhp->fh_export)) {
+		nfsd_sync_dir(dentry);
+		/* setattr will sync the child (or not) */
+	}
+
+	/*
+	 * Update the filehandle to get the new inode info.
+	 */
+	err = fh_update(resfhp);
+	if (err)
+		goto out;
+
+	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+		/* Cram the verifier into atime/mtime/mode */
+		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+			| ATTR_MTIME_SET|ATTR_ATIME_SET
+			| ATTR_MODE;
+		/* XXX someone who knows this better please fix it for nsec */ 
+		iap->ia_mtime.tv_sec = v_mtime;
+		iap->ia_atime.tv_sec = v_atime;
+		iap->ia_mtime.tv_nsec = 0;
+		iap->ia_atime.tv_nsec = 0;
+		iap->ia_mode  = v_mode;
+	}
+
+	/* Set file attributes.
+	 * Mode has already been set but we might need to reset it
+	 * for CREATE_EXCLUSIVE
+	 * Irix appears to send along the gid when it tries to
+	 * implement setgid directories via NFS. Clear out all that cruft.
+	 */
+ set_attr:
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0)
+ 		err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+
+ out:
+	fh_unlock(fhp);
+	if (dchild && !IS_ERR(dchild))
+		dput(dchild);
+ 	return err;
+ 
+ out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+/*
+ * Read a symlink. On entry, *lenp must contain the maximum path length that
+ * fits into the buffer. On return, it contains the true length.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
+{
+	struct dentry	*dentry;
+	struct inode	*inode;
+	mm_segment_t	oldfs;
+	int		err;
+
+	err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+
+	err = nfserr_inval;
+	if (!inode->i_op || !inode->i_op->readlink)
+		goto out;
+
+	touch_atime(fhp->fh_export->ex_mnt, dentry);
+	/* N.B. Why does this call need a get_fs()??
+	 * Remove the set_fs and watch the fireworks:-) --okir
+	 */
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = inode->i_op->readlink(dentry, buf, *lenp);
+	set_fs(oldfs);
+
+	if (err < 0)
+		goto out_nfserr;
+	*lenp = err;
+	err = 0;
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Create a symlink and look up its inode
+ * N.B. After this call _both_ fhp and resfhp need an fh_put
+ */
+int
+nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+				char *fname, int flen,
+				char *path,  int plen,
+				struct svc_fh *resfhp,
+				struct iattr *iap)
+{
+	struct dentry	*dentry, *dnew;
+	int		err, cerr;
+	umode_t		mode;
+
+	err = nfserr_noent;
+	if (!flen || !plen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+	fh_lock(fhp);
+	dentry = fhp->fh_dentry;
+	dnew = lookup_one_len(fname, dentry, flen);
+	err = PTR_ERR(dnew);
+	if (IS_ERR(dnew))
+		goto out_nfserr;
+
+	mode = S_IALLUGO;
+	/* Only the MODE ATTRibute is even vaguely meaningful */
+	if (iap && (iap->ia_valid & ATTR_MODE))
+		mode = iap->ia_mode & S_IALLUGO;
+
+	if (unlikely(path[plen] != 0)) {
+		char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
+		if (path_alloced == NULL)
+			err = -ENOMEM;
+		else {
+			strncpy(path_alloced, path, plen);
+			path_alloced[plen] = 0;
+			err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+			kfree(path_alloced);
+		}
+	} else
+		err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+
+	if (!err) {
+		if (EX_ISSYNC(fhp->fh_export))
+			nfsd_sync_dir(dentry);
+	} else
+		err = nfserrno(err);
+	fh_unlock(fhp);
+
+	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+	dput(dnew);
+	if (err==0) err = cerr;
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Create a hardlink
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int
+nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+				char *name, int len, struct svc_fh *tfhp)
+{
+	struct dentry	*ddir, *dnew, *dold;
+	struct inode	*dirp, *dest;
+	int		err;
+
+	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+	if (err)
+		goto out;
+
+	err = nfserr_perm;
+	if (!len)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(name, len))
+		goto out;
+
+	fh_lock(ffhp);
+	ddir = ffhp->fh_dentry;
+	dirp = ddir->d_inode;
+
+	dnew = lookup_one_len(name, ddir, len);
+	err = PTR_ERR(dnew);
+	if (IS_ERR(dnew))
+		goto out_nfserr;
+
+	dold = tfhp->fh_dentry;
+	dest = dold->d_inode;
+
+	err = vfs_link(dold, dirp, dnew);
+	if (!err) {
+		if (EX_ISSYNC(ffhp->fh_export)) {
+			nfsd_sync_dir(ddir);
+			write_inode_now(dest, 1);
+		}
+	} else {
+		if (err == -EXDEV && rqstp->rq_vers == 2)
+			err = nfserr_acces;
+		else
+			err = nfserrno(err);
+	}
+
+	fh_unlock(ffhp);
+	dput(dnew);
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Rename a file
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int
+nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+			    struct svc_fh *tfhp, char *tname, int tlen)
+{
+	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
+	struct inode	*fdir, *tdir;
+	int		err;
+
+	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
+	if (err)
+		goto out;
+	err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	fdentry = ffhp->fh_dentry;
+	fdir = fdentry->d_inode;
+
+	tdentry = tfhp->fh_dentry;
+	tdir = tdentry->d_inode;
+
+	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+	if (fdir->i_sb != tdir->i_sb)
+		goto out;
+
+	err = nfserr_perm;
+	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
+		goto out;
+
+	/* cannot use fh_lock as we need deadlock protective ordering
+	 * so do it by hand */
+	trap = lock_rename(tdentry, fdentry);
+	ffhp->fh_locked = tfhp->fh_locked = 1;
+	fill_pre_wcc(ffhp);
+	fill_pre_wcc(tfhp);
+
+	odentry = lookup_one_len(fname, fdentry, flen);
+	err = PTR_ERR(odentry);
+	if (IS_ERR(odentry))
+		goto out_nfserr;
+
+	err = -ENOENT;
+	if (!odentry->d_inode)
+		goto out_dput_old;
+	err = -EINVAL;
+	if (odentry == trap)
+		goto out_dput_old;
+
+	ndentry = lookup_one_len(tname, tdentry, tlen);
+	err = PTR_ERR(ndentry);
+	if (IS_ERR(ndentry))
+		goto out_dput_old;
+	err = -ENOTEMPTY;
+	if (ndentry == trap)
+		goto out_dput_new;
+
+#ifdef MSNFS
+	if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+		((atomic_read(&odentry->d_count) > 1)
+		 || (atomic_read(&ndentry->d_count) > 1))) {
+			err = nfserr_perm;
+	} else
+#endif
+	err = vfs_rename(fdir, odentry, tdir, ndentry);
+	if (!err && EX_ISSYNC(tfhp->fh_export)) {
+		nfsd_sync_dir(tdentry);
+		nfsd_sync_dir(fdentry);
+	}
+
+ out_dput_new:
+	dput(ndentry);
+ out_dput_old:
+	dput(odentry);
+ out_nfserr:
+	if (err)
+		err = nfserrno(err);
+
+	/* we cannot reply on fh_unlock on the two filehandles,
+	 * as that would do the wrong thing if the two directories
+	 * were the same, so again we do it by hand
+	 */
+	fill_post_wcc(ffhp);
+	fill_post_wcc(tfhp);
+	unlock_rename(tdentry, fdentry);
+	ffhp->fh_locked = tfhp->fh_locked = 0;
+
+out:
+	return err;
+}
+
+/*
+ * Unlink a file or directory
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+				char *fname, int flen)
+{
+	struct dentry	*dentry, *rdentry;
+	struct inode	*dirp;
+	int		err;
+
+	err = nfserr_acces;
+	if (!flen || isdotent(fname, flen))
+		goto out;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+	if (err)
+		goto out;
+
+	fh_lock(fhp);
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	rdentry = lookup_one_len(fname, dentry, flen);
+	err = PTR_ERR(rdentry);
+	if (IS_ERR(rdentry))
+		goto out_nfserr;
+
+	if (!rdentry->d_inode) {
+		dput(rdentry);
+		err = nfserr_noent;
+		goto out;
+	}
+
+	if (!type)
+		type = rdentry->d_inode->i_mode & S_IFMT;
+
+	if (type != S_IFDIR) { /* It's UNLINK */
+#ifdef MSNFS
+		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+			(atomic_read(&rdentry->d_count) > 1)) {
+			err = nfserr_perm;
+		} else
+#endif
+		err = vfs_unlink(dirp, rdentry);
+	} else { /* It's RMDIR */
+		err = vfs_rmdir(dirp, rdentry);
+	}
+
+	dput(rdentry);
+
+	if (err)
+		goto out_nfserr;
+	if (EX_ISSYNC(fhp->fh_export)) 
+		nfsd_sync_dir(dentry);
+
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Read entries from a directory.
+ * The  NFSv3/4 verifier we ignore for now.
+ */
+int
+nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 
+	     struct readdir_cd *cdp, encode_dent_fn func)
+{
+	int		err;
+	struct file	*file;
+	loff_t		offset = *offsetp;
+
+	err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
+	if (err)
+		goto out;
+
+	offset = vfs_llseek(file, offset, 0);
+	if (offset < 0) {
+		err = nfserrno((int)offset);
+		goto out_close;
+	}
+
+	/*
+	 * Read the directory entries. This silly loop is necessary because
+	 * readdir() is not guaranteed to fill up the entire buffer, but
+	 * may choose to do less.
+	 */
+
+	do {
+		cdp->err = nfserr_eof; /* will be cleared on successful read */
+		err = vfs_readdir(file, (filldir_t) func, cdp);
+	} while (err >=0 && cdp->err == nfs_ok);
+	if (err)
+		err = nfserrno(err);
+	else
+		err = cdp->err;
+	*offsetp = vfs_llseek(file, 0, 1);
+
+	if (err == nfserr_eof || err == nfserr_toosmall)
+		err = nfs_ok; /* can still be found in ->err */
+out_close:
+	nfsd_close(file);
+out:
+	return err;
+}
+
+/*
+ * Get file system stats
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
+{
+	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat))
+		err = nfserr_io;
+	return err;
+}
+
+/*
+ * Check for a user's access permissions to this inode.
+ */
+int
+nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
+{
+	struct inode	*inode = dentry->d_inode;
+	int		err;
+
+	if (acc == MAY_NOP)
+		return 0;
+#if 0
+	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
+		acc,
+		(acc & MAY_READ)?	" read"  : "",
+		(acc & MAY_WRITE)?	" write" : "",
+		(acc & MAY_EXEC)?	" exec"  : "",
+		(acc & MAY_SATTR)?	" sattr" : "",
+		(acc & MAY_TRUNC)?	" trunc" : "",
+		(acc & MAY_LOCK)?	" lock"  : "",
+		(acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
+		inode->i_mode,
+		IS_IMMUTABLE(inode)?	" immut" : "",
+		IS_APPEND(inode)?	" append" : "",
+		IS_RDONLY(inode)?	" ro" : "");
+	dprintk("      owner %d/%d user %d/%d\n",
+		inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+#endif
+
+	/* Normally we reject any write/sattr etc access on a read-only file
+	 * system.  But if it is IRIX doing check on write-access for a 
+	 * device special file, we ignore rofs.
+	 */
+	if (!(acc & MAY_LOCAL_ACCESS))
+		if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
+			if (EX_RDONLY(exp) || IS_RDONLY(inode))
+				return nfserr_rofs;
+			if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
+				return nfserr_perm;
+		}
+	if ((acc & MAY_TRUNC) && IS_APPEND(inode))
+		return nfserr_perm;
+
+	if (acc & MAY_LOCK) {
+		/* If we cannot rely on authentication in NLM requests,
+		 * just allow locks, otherwise require read permission, or
+		 * ownership
+		 */
+		if (exp->ex_flags & NFSEXP_NOAUTHNLM)
+			return 0;
+		else
+			acc = MAY_READ | MAY_OWNER_OVERRIDE;
+	}
+	/*
+	 * The file owner always gets access permission for accesses that
+	 * would normally be checked at open time. This is to make
+	 * file access work even when the client has done a fchmod(fd, 0).
+	 *
+	 * However, `cp foo bar' should fail nevertheless when bar is
+	 * readonly. A sensible way to do this might be to reject all
+	 * attempts to truncate a read-only file, because a creat() call
+	 * always implies file truncation.
+	 * ... but this isn't really fair.  A process may reasonably call
+	 * ftruncate on an open file descriptor on a file with perm 000.
+	 * We must trust the client to do permission checking - using "ACCESS"
+	 * with NFSv3.
+	 */
+	if ((acc & MAY_OWNER_OVERRIDE) &&
+	    inode->i_uid == current->fsuid)
+		return 0;
+
+	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+
+	/* Allow read access to binaries even when mode 111 */
+	if (err == -EACCES && S_ISREG(inode->i_mode) &&
+	    acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+		err = permission(inode, MAY_EXEC, NULL);
+
+	return err? nfserrno(err) : 0;
+}
+
+void
+nfsd_racache_shutdown(void)
+{
+	if (!raparm_cache)
+		return;
+	dprintk("nfsd: freeing readahead buffers.\n");
+	kfree(raparml);
+	raparm_cache = raparml = NULL;
+}
+/*
+ * Initialize readahead param cache
+ */
+int
+nfsd_racache_init(int cache_size)
+{
+	int	i;
+
+	if (raparm_cache)
+		return 0;
+	raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
+
+	if (raparml != NULL) {
+		dprintk("nfsd: allocating %d readahead buffers.\n",
+			cache_size);
+		memset(raparml, 0, sizeof(struct raparms) * cache_size);
+		for (i = 0; i < cache_size - 1; i++) {
+			raparml[i].p_next = raparml + i + 1;
+		}
+		raparm_cache = raparml;
+	} else {
+		printk(KERN_WARNING
+		       "nfsd: Could not allocate memory read-ahead cache.\n");
+		return -ENOMEM;
+	}
+	nfsdstats.ra_size = cache_size;
+	return 0;
+}
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfsd