diff options
| author | Mike Marshall <hubcap@omnibond.com> | 2015-07-17 10:38:15 -0400 | 
|---|---|---|
| committer | Mike Marshall <hubcap@omnibond.com> | 2015-10-03 11:39:57 -0400 | 
| commit | 1182fca3bc00441d5b2dee2f0548a3b7f978f9e7 (patch) | |
| tree | e33ca6e48fdbb2e64671b0c7bfc4a230868bb51b | |
| parent | f7be4ee07fb72a516563bc2870ef41fa589a964a (diff) | |
Orangefs: kernel client part 5
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
| -rw-r--r-- | fs/orangefs/super.c | 558 | ||||
| -rw-r--r-- | fs/orangefs/symlink.c | 31 | ||||
| -rw-r--r-- | fs/orangefs/waitqueue.c | 522 | ||||
| -rw-r--r-- | fs/orangefs/xattr.c | 532 | 
4 files changed, 1643 insertions, 0 deletions
| diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c new file mode 100644 index 000000000000..a854390fc0ea --- /dev/null +++ b/fs/orangefs/super.c @@ -0,0 +1,558 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +#include <linux/parser.h> + +/* a cache for pvfs2-inode objects (i.e. pvfs2 inode private data) */ +static struct kmem_cache *pvfs2_inode_cache; + +/* list for storing pvfs2 specific superblocks in use */ +LIST_HEAD(pvfs2_superblocks); + +DEFINE_SPINLOCK(pvfs2_superblocks_lock); + +enum { +	Opt_intr, +	Opt_acl, +	Opt_local_lock, + +	Opt_err +}; + +static const match_table_t tokens = { +	{ Opt_acl,		"acl" }, +	{ Opt_intr,		"intr" }, +	{ Opt_local_lock,	"local_lock" }, +	{ Opt_err,	NULL } +}; + + +static int parse_mount_options(struct super_block *sb, char *options, +		int silent) +{ +	struct pvfs2_sb_info_s *pvfs2_sb = PVFS2_SB(sb); +	substring_t args[MAX_OPT_ARGS]; +	char *p; + +	/* +	 * Force any potential flags that might be set from the mount +	 * to zero, ie, initialize to unset. +	 */ +	sb->s_flags &= ~MS_POSIXACL; +	pvfs2_sb->flags &= ~PVFS2_OPT_INTR; +	pvfs2_sb->flags &= ~PVFS2_OPT_LOCAL_LOCK; + +	while ((p = strsep(&options, ",")) != NULL) { +		int token; + +		if (!*p) +			continue; + +		token = match_token(p, tokens, args); +		switch (token) { +		case Opt_acl: +			sb->s_flags |= MS_POSIXACL; +			break; +		case Opt_intr: +			pvfs2_sb->flags |= PVFS2_OPT_INTR; +			break; +		case Opt_local_lock: +			pvfs2_sb->flags |= PVFS2_OPT_LOCAL_LOCK; +			break; +		default: +			goto fail; +		} +	} + +	return 0; +fail: +	if (!silent) +		gossip_err("Error: mount option [%s] is not supported.\n", p); +	return -EINVAL; +} + +static void pvfs2_inode_cache_ctor(void *req) +{ +	struct pvfs2_inode_s *pvfs2_inode = req; + +	inode_init_once(&pvfs2_inode->vfs_inode); +	init_rwsem(&pvfs2_inode->xattr_sem); + +	pvfs2_inode->vfs_inode.i_version = 1; +} + +static struct inode *pvfs2_alloc_inode(struct super_block *sb) +{ +	struct pvfs2_inode_s *pvfs2_inode; + +	pvfs2_inode = kmem_cache_alloc(pvfs2_inode_cache, +				       PVFS2_CACHE_ALLOC_FLAGS); +	if (pvfs2_inode == NULL) { +		gossip_err("Failed to allocate pvfs2_inode\n"); +		return NULL; +	} + +	/* +	 * We want to clear everything except for rw_semaphore and the +	 * vfs_inode. +	 */ +	memset(&pvfs2_inode->refn.khandle, 0, 16); +	pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL; +	pvfs2_inode->last_failed_block_index_read = 0; +	memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target)); +	pvfs2_inode->pinode_flags = 0; + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_alloc_inode: allocated %p\n", +		     &pvfs2_inode->vfs_inode); +	return &pvfs2_inode->vfs_inode; +} + +static void pvfs2_destroy_inode(struct inode *inode) +{ +	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +			"%s: deallocated %p destroying inode %pU\n", +			__func__, pvfs2_inode, get_khandle_from_ino(inode)); + +	kmem_cache_free(pvfs2_inode_cache, pvfs2_inode); +} + +/* + * NOTE: information filled in here is typically reflected in the + * output of the system command 'df' +*/ +static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf) +{ +	int ret = -ENOMEM; +	struct pvfs2_kernel_op_s *new_op = NULL; +	int flags = 0; +	struct super_block *sb = NULL; + +	sb = dentry->d_sb; + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_statfs: called on sb %p (fs_id is %d)\n", +		     sb, +		     (int)(PVFS2_SB(sb)->fs_id)); + +	new_op = op_alloc(PVFS2_VFS_OP_STATFS); +	if (!new_op) +		return ret; +	new_op->upcall.req.statfs.fs_id = PVFS2_SB(sb)->fs_id; + +	if (PVFS2_SB(sb)->flags & PVFS2_OPT_INTR) +		flags = PVFS2_OP_INTERRUPTIBLE; + +	ret = service_operation(new_op, "pvfs2_statfs", flags); + +	if (new_op->downcall.status < 0) +		goto out_op_release; + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_statfs: got %ld blocks available | " +		     "%ld blocks total | %ld block size\n", +		     (long)new_op->downcall.resp.statfs.blocks_avail, +		     (long)new_op->downcall.resp.statfs.blocks_total, +		     (long)new_op->downcall.resp.statfs.block_size); + +	buf->f_type = sb->s_magic; +	memcpy(&buf->f_fsid, &PVFS2_SB(sb)->fs_id, sizeof(buf->f_fsid)); +	buf->f_bsize = new_op->downcall.resp.statfs.block_size; +	buf->f_namelen = PVFS2_NAME_LEN; + +	buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total; +	buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail; +	buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail; +	buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total; +	buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail; +	buf->f_frsize = sb->s_blocksize; + +out_op_release: +	op_release(new_op); +	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_statfs: returning %d\n", ret); +	return ret; +} + +/* + * Remount as initiated by VFS layer.  We just need to reparse the mount + * options, no need to signal pvfs2-client-core about it. + */ +static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data) +{ +	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n"); +	return parse_mount_options(sb, data, 1); +} + +/* + * Remount as initiated by pvfs2-client-core on restart.  This is used to + * repopulate mount information left from previous pvfs2-client-core. + * + * the idea here is that given a valid superblock, we're + * re-initializing the user space client with the initial mount + * information specified when the super block was first initialized. + * this is very different than the first initialization/creation of a + * superblock.  we use the special service_priority_operation to make + * sure that the mount gets ahead of any other pending operation that + * is waiting for servicing.  this means that the pvfs2-client won't + * fail to start several times for all other pending operations before + * the client regains all of the mount information from us. + * NOTE: this function assumes that the request_mutex is already acquired! + */ +int pvfs2_remount(struct super_block *sb) +{ +	struct pvfs2_kernel_op_s *new_op; +	int ret = -EINVAL; + +	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount: called\n"); + +	new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT); +	if (!new_op) +		return -ENOMEM; +	strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server, +		PVFS2_SB(sb)->devname, +		PVFS_MAX_SERVER_ADDR_LEN); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "Attempting PVFS2 Remount via host %s\n", +		     new_op->upcall.req.fs_mount.pvfs2_config_server); + +	/* +	 * we assume that the calling function has already acquire the +	 * request_mutex to prevent other operations from bypassing +	 * this one +	 */ +	ret = service_operation(new_op, "pvfs2_remount", +		PVFS2_OP_PRIORITY | PVFS2_OP_NO_SEMAPHORE); +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_remount: mount got return value of %d\n", +		     ret); +	if (ret == 0) { +		/* +		 * store the id assigned to this sb -- it's just a +		 * short-lived mapping that the system interface uses +		 * to map this superblock to a particular mount entry +		 */ +		PVFS2_SB(sb)->id = new_op->downcall.resp.fs_mount.id; +		PVFS2_SB(sb)->mount_pending = 0; +	} + +	op_release(new_op); +	return ret; +} + +int fsid_key_table_initialize(void) +{ +	return 0; +} + +void fsid_key_table_finalize(void) +{ +} + +/* Called whenever the VFS dirties the inode in response to atime updates */ +static void pvfs2_dirty_inode(struct inode *inode, int flags) +{ +	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_dirty_inode: %pU\n", +		     get_khandle_from_ino(inode)); +	SetAtimeFlag(pvfs2_inode); +} + +struct super_operations pvfs2_s_ops = { +	.alloc_inode = pvfs2_alloc_inode, +	.destroy_inode = pvfs2_destroy_inode, +	.dirty_inode = pvfs2_dirty_inode, +	.drop_inode = generic_delete_inode, +	.statfs = pvfs2_statfs, +	.remount_fs = pvfs2_remount_fs, +	.show_options = generic_show_options, +}; + +struct dentry *pvfs2_fh_to_dentry(struct super_block *sb, +				  struct fid *fid, +				  int fh_len, +				  int fh_type) +{ +	struct pvfs2_object_kref refn; + +	if (fh_len < 5 || fh_type > 2) +		return NULL; + +	PVFS_khandle_from(&(refn.khandle), fid->raw, 16); +	refn.fs_id = (u32) fid->raw[4]; +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "fh_to_dentry: handle %pU, fs_id %d\n", +		     &refn.khandle, +		     refn.fs_id); + +	return d_obtain_alias(pvfs2_iget(sb, &refn)); +} + +int pvfs2_encode_fh(struct inode *inode, +		    __u32 *fh, +		    int *max_len, +		    struct inode *parent) +{ +	int len = parent ? 10 : 5; +	int type = 1; +	struct pvfs2_object_kref refn; + +	if (*max_len < len) { +		gossip_lerr("fh buffer is too small for encoding\n"); +		*max_len = len; +		type = 255; +		goto out; +	} + +	refn = PVFS2_I(inode)->refn; +	PVFS_khandle_to(&refn.khandle, fh, 16); +	fh[4] = refn.fs_id; + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "Encoding fh: handle %pU, fsid %u\n", +		     &refn.khandle, +		     refn.fs_id); + + +	if (parent) { +		refn = PVFS2_I(parent)->refn; +		PVFS_khandle_to(&refn.khandle, (char *) fh + 20, 16); +		fh[9] = refn.fs_id; + +		type = 2; +		gossip_debug(GOSSIP_SUPER_DEBUG, +			     "Encoding parent: handle %pU, fsid %u\n", +			     &refn.khandle, +			     refn.fs_id); +	} +	*max_len = len; + +out: +	return type; +} + +static struct export_operations pvfs2_export_ops = { +	.encode_fh = pvfs2_encode_fh, +	.fh_to_dentry = pvfs2_fh_to_dentry, +}; + +int pvfs2_fill_sb(struct super_block *sb, void *data, int silent) +{ +	int ret = -EINVAL; +	struct inode *root = NULL; +	struct dentry *root_dentry = NULL; +	struct pvfs2_mount_sb_info_s *mount_sb_info = +		(struct pvfs2_mount_sb_info_s *) data; +	struct pvfs2_object_kref root_object; + +	/* alloc and init our private pvfs2 sb info */ +	sb->s_fs_info = +		kmalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS); +	if (!PVFS2_SB(sb)) +		return -ENOMEM; +	memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s)); +	PVFS2_SB(sb)->sb = sb; + +	PVFS2_SB(sb)->root_khandle = mount_sb_info->root_khandle; +	PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id; +	PVFS2_SB(sb)->id = mount_sb_info->id; + +	if (mount_sb_info->data) { +		ret = parse_mount_options(sb, mount_sb_info->data, +					  silent); +		if (ret) +			return ret; +	} + +	/* Hang the xattr handlers off the superblock */ +	sb->s_xattr = pvfs2_xattr_handlers; +	sb->s_magic = PVFS2_SUPER_MAGIC; +	sb->s_op = &pvfs2_s_ops; +	sb->s_d_op = &pvfs2_dentry_operations; + +	sb->s_blocksize = pvfs_bufmap_size_query(); +	sb->s_blocksize_bits = pvfs_bufmap_shift_query(); +	sb->s_maxbytes = MAX_LFS_FILESIZE; + +	root_object.khandle = PVFS2_SB(sb)->root_khandle; +	root_object.fs_id = PVFS2_SB(sb)->fs_id; +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "get inode %pU, fsid %d\n", +		     &root_object.khandle, +		     root_object.fs_id); + +	root = pvfs2_iget(sb, &root_object); +	if (IS_ERR(root)) +		return PTR_ERR(root); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "Allocated root inode [%p] with mode %x\n", +		     root, +		     root->i_mode); + +	/* allocates and places root dentry in dcache */ +	root_dentry = d_make_root(root); +	if (!root_dentry) { +		iput(root); +		return -ENOMEM; +	} + +	sb->s_export_op = &pvfs2_export_ops; +	sb->s_root = root_dentry; +	return 0; +} + +struct dentry *pvfs2_mount(struct file_system_type *fst, +			   int flags, +			   const char *devname, +			   void *data) +{ +	int ret = -EINVAL; +	struct super_block *sb = ERR_PTR(-EINVAL); +	struct pvfs2_kernel_op_s *new_op; +	struct pvfs2_mount_sb_info_s mount_sb_info; +	struct dentry *mnt_sb_d = ERR_PTR(-EINVAL); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_mount: called with devname %s\n", +		     devname); + +	if (!devname) { +		gossip_err("ERROR: device name not specified.\n"); +		return ERR_PTR(-EINVAL); +	} + +	new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT); +	if (!new_op) +		return ERR_PTR(-ENOMEM); + +	strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server, +		devname, +		PVFS_MAX_SERVER_ADDR_LEN); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "Attempting PVFS2 Mount via host %s\n", +		     new_op->upcall.req.fs_mount.pvfs2_config_server); + +	ret = service_operation(new_op, "pvfs2_mount", 0); +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_mount: mount got return value of %d\n", ret); +	if (ret) +		goto free_op; + +	if (new_op->downcall.resp.fs_mount.fs_id == PVFS_FS_ID_NULL) { +		gossip_err("ERROR: Retrieved null fs_id\n"); +		ret = -EINVAL; +		goto free_op; +	} + +	/* fill in temporary structure passed to fill_sb method */ +	mount_sb_info.data = data; +	mount_sb_info.root_khandle = +		new_op->downcall.resp.fs_mount.root_khandle; +	mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id; +	mount_sb_info.id = new_op->downcall.resp.fs_mount.id; + +	/* +	 * the mount_sb_info structure looks odd, but it's used because +	 * the private sb info isn't allocated until we call +	 * pvfs2_fill_sb, yet we have the info we need to fill it with +	 * here.  so we store it temporarily and pass all of the info +	 * to fill_sb where it's properly copied out +	 */ +	mnt_sb_d = mount_nodev(fst, +			       flags, +			       (void *)&mount_sb_info, +			       pvfs2_fill_sb); +	if (IS_ERR(mnt_sb_d)) { +		sb = ERR_CAST(mnt_sb_d); +		goto free_op; +	} + +	sb = mnt_sb_d->d_sb; + +	/* +	 * on successful mount, store the devname and data +	 * used +	 */ +	strncpy(PVFS2_SB(sb)->devname, +		devname, +		PVFS_MAX_SERVER_ADDR_LEN); + +	/* mount_pending must be cleared */ +	PVFS2_SB(sb)->mount_pending = 0; + +	/* +	 * finally, add this sb to our list of known pvfs2 +	 * sb's +	 */ +	add_pvfs2_sb(sb); +	op_release(new_op); +	return mnt_sb_d; + +free_op: +	gossip_err("pvfs2_mount: mount request failed with %d\n", ret); +	if (ret == -EINVAL) { +		gossip_err("Ensure that all pvfs2-servers have the same FS configuration files\n"); +		gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n"); +	} + +	op_release(new_op); + +	gossip_debug(GOSSIP_SUPER_DEBUG, +		     "pvfs2_mount: returning dentry %p\n", +		     mnt_sb_d); +	return mnt_sb_d; +} + +void pvfs2_kill_sb(struct super_block *sb) +{ +	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_kill_sb: called\n"); + +	/* +	 * issue the unmount to userspace to tell it to remove the +	 * dynamic mount info it has for this superblock +	 */ +	pvfs2_unmount_sb(sb); + +	/* remove the sb from our list of pvfs2 specific sb's */ +	remove_pvfs2_sb(sb); + +	/* provided sb cleanup */ +	kill_anon_super(sb); + +	/* free the pvfs2 superblock private data */ +	kfree(PVFS2_SB(sb)); +} + +int pvfs2_inode_cache_initialize(void) +{ +	pvfs2_inode_cache = kmem_cache_create("pvfs2_inode_cache", +					      sizeof(struct pvfs2_inode_s), +					      0, +					      PVFS2_CACHE_CREATE_FLAGS, +					      pvfs2_inode_cache_ctor); + +	if (!pvfs2_inode_cache) { +		gossip_err("Cannot create pvfs2_inode_cache\n"); +		return -ENOMEM; +	} +	return 0; +} + +int pvfs2_inode_cache_finalize(void) +{ +	kmem_cache_destroy(pvfs2_inode_cache); +	return 0; +} diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c new file mode 100644 index 000000000000..2adfceff7730 --- /dev/null +++ b/fs/orangefs/symlink.c @@ -0,0 +1,31 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +static const char *pvfs2_follow_link(struct dentry *dentry, void **cookie) +{ +	char *target =  PVFS2_I(dentry->d_inode)->link_target; + +	gossip_debug(GOSSIP_INODE_DEBUG, +		     "%s: called on %s (target is %p)\n", +		     __func__, (char *)dentry->d_name.name, target); + +	*cookie = target; + +	return target; +} + +struct inode_operations pvfs2_symlink_inode_operations = { +	.readlink = generic_readlink, +	.follow_link = pvfs2_follow_link, +	.setattr = pvfs2_setattr, +	.getattr = pvfs2_getattr, +	.listxattr = pvfs2_listxattr, +	.setxattr = generic_setxattr, +}; diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c new file mode 100644 index 000000000000..9b32286a7dc4 --- /dev/null +++ b/fs/orangefs/waitqueue.c @@ -0,0 +1,522 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * (C) 2011 Omnibond Systems + * + * Changes by Acxiom Corporation to implement generic service_operation() + * function, Copyright Acxiom Corporation, 2005. + * + * See COPYING in top-level directory. + */ + +/* + *  In-kernel waitqueue operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" + +/* + * What we do in this function is to walk the list of operations that are + * present in the request queue and mark them as purged. + * NOTE: This is called from the device close after client-core has + * guaranteed that no new operations could appear on the list since the + * client-core is anyway going to exit. + */ +void purge_waiting_ops(void) +{ +	struct pvfs2_kernel_op_s *op; + +	spin_lock(&pvfs2_request_list_lock); +	list_for_each_entry(op, &pvfs2_request_list, list) { +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "pvfs2-client-core: purging op tag %llu %s\n", +			     llu(op->tag), +			     get_opname_string(op)); +		spin_lock(&op->lock); +		set_op_state_purged(op); +		spin_unlock(&op->lock); +		wake_up_interruptible(&op->waitq); +	} +	spin_unlock(&pvfs2_request_list_lock); +} + +/* + * submits a PVFS2 operation and waits for it to complete + * + * Note op->downcall.status will contain the status of the operation (in + * errno format), whether provided by pvfs2-client or a result of failure to + * service the operation.  If the caller wishes to distinguish, then + * op->state can be checked to see if it was serviced or not. + * + * Returns contents of op->downcall.status for convenience + */ +int service_operation(struct pvfs2_kernel_op_s *op, +		      const char *op_name, +		      int flags) +{ +	/* flags to modify behavior */ +	sigset_t orig_sigset; +	int ret = 0; + +	/* irqflags and wait_entry are only used IF the client-core aborts */ +	unsigned long irqflags; + +	DECLARE_WAITQUEUE(wait_entry, current); + +	op->upcall.tgid = current->tgid; +	op->upcall.pid = current->pid; + +retry_servicing: +	op->downcall.status = 0; +	gossip_debug(GOSSIP_WAIT_DEBUG, +		     "pvfs2: service_operation: %s %p\n", +		     op_name, +		     op); +	gossip_debug(GOSSIP_WAIT_DEBUG, +		     "pvfs2: operation posted by process: %s, pid: %i\n", +		     current->comm, +		     current->pid); + +	/* mask out signals if this operation is not to be interrupted */ +	if (!(flags & PVFS2_OP_INTERRUPTIBLE)) +		mask_blocked_signals(&orig_sigset); + +	if (!(flags & PVFS2_OP_NO_SEMAPHORE)) { +		ret = mutex_lock_interruptible(&request_mutex); +		/* +		 * check to see if we were interrupted while waiting for +		 * semaphore +		 */ +		if (ret < 0) { +			if (!(flags & PVFS2_OP_INTERRUPTIBLE)) +				unmask_blocked_signals(&orig_sigset); +			op->downcall.status = ret; +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "pvfs2: service_operation interrupted.\n"); +			return ret; +		} +	} + +	gossip_debug(GOSSIP_WAIT_DEBUG, +		     "%s:About to call is_daemon_in_service().\n", +		     __func__); + +	if (is_daemon_in_service() < 0) { +		/* +		 * By incrementing the per-operation attempt counter, we +		 * directly go into the timeout logic while waiting for +		 * the matching downcall to be read +		 */ +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s:client core is NOT in service(%d).\n", +			     __func__, +			     is_daemon_in_service()); +		op->attempts++; +	} + +	/* queue up the operation */ +	if (flags & PVFS2_OP_PRIORITY) { +		add_priority_op_to_request_list(op); +	} else { +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s:About to call add_op_to_request_list().\n", +			     __func__); +		add_op_to_request_list(op); +	} + +	if (!(flags & PVFS2_OP_NO_SEMAPHORE)) +		mutex_unlock(&request_mutex); + +	/* +	 * If we are asked to service an asynchronous operation from +	 * VFS perspective, we are done. +	 */ +	if (flags & PVFS2_OP_ASYNC) +		return 0; + +	if (flags & PVFS2_OP_CANCELLATION) { +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s:" +			     "About to call wait_for_cancellation_downcall.\n", +			     __func__); +		ret = wait_for_cancellation_downcall(op); +	} else { +		ret = wait_for_matching_downcall(op); +	} + +	if (ret < 0) { +		/* failed to get matching downcall */ +		if (ret == -ETIMEDOUT) { +			gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n", +				   op_name); +		} +		op->downcall.status = ret; +	} else { +		/* got matching downcall; make sure status is in errno format */ +		op->downcall.status = +		    pvfs2_normalize_to_errno(op->downcall.status); +		ret = op->downcall.status; +	} + +	if (!(flags & PVFS2_OP_INTERRUPTIBLE)) +		unmask_blocked_signals(&orig_sigset); + +	BUG_ON(ret != op->downcall.status); +	/* retry if operation has not been serviced and if requested */ +	if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) { +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "pvfs2: tag %llu (%s)" +			     " -- operation to be retried (%d attempt)\n", +			     llu(op->tag), +			     op_name, +			     op->attempts + 1); + +		if (!op->uses_shared_memory) +			/* +			 * this operation doesn't use the shared memory +			 * system +			 */ +			goto retry_servicing; + +		/* op uses shared memory */ +		if (get_bufmap_init() == 0) { +			/* +			 * This operation uses the shared memory system AND +			 * the system is not yet ready. This situation occurs +			 * when the client-core is restarted AND there were +			 * operations waiting to be processed or were already +			 * in process. +			 */ +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "uses_shared_memory is true.\n"); +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "Client core in-service status(%d).\n", +				     is_daemon_in_service()); +			gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n", +				     get_bufmap_init()); +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "operation's status is 0x%0x.\n", +				     op->op_state); + +			/* +			 * let process sleep for a few seconds so shared +			 * memory system can be initialized. +			 */ +			spin_lock_irqsave(&op->lock, irqflags); +			add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry); +			spin_unlock_irqrestore(&op->lock, irqflags); + +			set_current_state(TASK_INTERRUPTIBLE); + +			/* +			 * Wait for pvfs_bufmap_initialize() to wake me up +			 * within the allotted time. +			 */ +			ret = schedule_timeout(MSECS_TO_JIFFIES +				(1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS)); + +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "Value returned from schedule_timeout:" +				     "%d.\n", +				     ret); +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "Is shared memory available? (%d).\n", +				     get_bufmap_init()); + +			spin_lock_irqsave(&op->lock, irqflags); +			remove_wait_queue(&pvfs2_bufmap_init_waitq, +					  &wait_entry); +			spin_unlock_irqrestore(&op->lock, irqflags); + +			if (get_bufmap_init() == 0) { +				gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n", +					   __func__, +					   PVFS2_BUFMAP_WAIT_TIMEOUT_SECS, +					   get_opname_string(op)); +				return -EIO; +			} + +			/* +			 * Return to the calling function and re-populate a +			 * shared memory buffer. +			 */ +			return -EAGAIN; +		} +	} + +	gossip_debug(GOSSIP_WAIT_DEBUG, +		     "pvfs2: service_operation %s returning: %d for %p.\n", +		     op_name, +		     ret, +		     op); +	return ret; +} + +void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op) +{ +	/* +	 * handle interrupted cases depending on what state we were in when +	 * the interruption is detected.  there is a coarse grained lock +	 * across the operation. +	 * +	 * NOTE: be sure not to reverse lock ordering by locking an op lock +	 * while holding the request_list lock.  Here, we first lock the op +	 * and then lock the appropriate list. +	 */ +	if (!op) { +		gossip_debug(GOSSIP_WAIT_DEBUG, +			    "%s: op is null, ignoring\n", +			     __func__); +		return; +	} + +	/* +	 * one more sanity check, make sure it's in one of the possible states +	 * or don't try to cancel it +	 */ +	if (!(op_state_waiting(op) || +	      op_state_in_progress(op) || +	      op_state_serviced(op) || +	      op_state_purged(op))) { +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s: op %p not in a valid state (%0x), " +			     "ignoring\n", +			     __func__, +			     op, +			     op->op_state); +		return; +	} + +	spin_lock(&op->lock); + +	if (op_state_waiting(op)) { +		/* +		 * upcall hasn't been read; remove op from upcall request +		 * list. +		 */ +		spin_unlock(&op->lock); +		remove_op_from_request_list(op); +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "Interrupted: Removed op %p from request_list\n", +			     op); +	} else if (op_state_in_progress(op)) { +		/* op must be removed from the in progress htable */ +		spin_unlock(&op->lock); +		spin_lock(&htable_ops_in_progress_lock); +		list_del(&op->list); +		spin_unlock(&htable_ops_in_progress_lock); +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "Interrupted: Removed op %p" +			     " from htable_ops_in_progress\n", +			     op); +	} else if (!op_state_serviced(op)) { +		spin_unlock(&op->lock); +		gossip_err("interrupted operation is in a weird state 0x%x\n", +			   op->op_state); +	} +} + +/* + * sleeps on waitqueue waiting for matching downcall. + * if client-core finishes servicing, then we are good to go. + * else if client-core exits, we get woken up here, and retry with a timeout + * + * Post when this call returns to the caller, the specified op will no + * longer be on any list or htable. + * + * Returns 0 on success and -errno on failure + * Errors are: + * EAGAIN in case we want the caller to requeue and try again.. + * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this + * operation since client-core seems to be exiting too often + * or if we were interrupted. + */ +int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op) +{ +	int ret = -EINVAL; +	DECLARE_WAITQUEUE(wait_entry, current); + +	spin_lock(&op->lock); +	add_wait_queue(&op->waitq, &wait_entry); +	spin_unlock(&op->lock); + +	while (1) { +		set_current_state(TASK_INTERRUPTIBLE); + +		spin_lock(&op->lock); +		if (op_state_serviced(op)) { +			spin_unlock(&op->lock); +			ret = 0; +			break; +		} +		spin_unlock(&op->lock); + +		if (!signal_pending(current)) { +			/* +			 * if this was our first attempt and client-core +			 * has not purged our operation, we are happy to +			 * simply wait +			 */ +			spin_lock(&op->lock); +			if (op->attempts == 0 && !op_state_purged(op)) { +				spin_unlock(&op->lock); +				schedule(); +			} else { +				spin_unlock(&op->lock); +				/* +				 * subsequent attempts, we retry exactly once +				 * with timeouts +				 */ +				if (!schedule_timeout(MSECS_TO_JIFFIES +				      (1000 * op_timeout_secs))) { +					gossip_debug(GOSSIP_WAIT_DEBUG, +						     "*** %s:" +						     " operation timed out (tag" +						     " %llu, %p, att %d)\n", +						     __func__, +						     llu(op->tag), +						     op, +						     op->attempts); +					ret = -ETIMEDOUT; +					pvfs2_clean_up_interrupted_operation +					    (op); +					break; +				} +			} +			spin_lock(&op->lock); +			op->attempts++; +			/* +			 * if the operation was purged in the meantime, it +			 * is better to requeue it afresh but ensure that +			 * we have not been purged repeatedly. This could +			 * happen if client-core crashes when an op +			 * is being serviced, so we requeue the op, client +			 * core crashes again so we requeue the op, client +			 * core starts, and so on... +			 */ +			if (op_state_purged(op)) { +				ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ? +					 -EAGAIN : +					 -EIO; +				spin_unlock(&op->lock); +				gossip_debug(GOSSIP_WAIT_DEBUG, +					     "*** %s:" +					     " operation purged (tag " +					     "%llu, %p, att %d)\n", +					     __func__, +					     llu(op->tag), +					     op, +					     op->attempts); +				pvfs2_clean_up_interrupted_operation(op); +				break; +			} +			spin_unlock(&op->lock); +			continue; +		} + +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "*** %s:" +			     " operation interrupted by a signal (tag " +			     "%llu, op %p)\n", +			     __func__, +			     llu(op->tag), +			     op); +		pvfs2_clean_up_interrupted_operation(op); +		ret = -EINTR; +		break; +	} + +	set_current_state(TASK_RUNNING); + +	spin_lock(&op->lock); +	remove_wait_queue(&op->waitq, &wait_entry); +	spin_unlock(&op->lock); + +	return ret; +} + +/* + * similar to wait_for_matching_downcall(), but used in the special case + * of I/O cancellations. + * + * Note we need a special wait function because if this is called we already + *      know that a signal is pending in current and need to service the + *      cancellation upcall anyway.  the only way to exit this is to either + *      timeout or have the cancellation be serviced properly. + */ +int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op) +{ +	int ret = -EINVAL; +	DECLARE_WAITQUEUE(wait_entry, current); + +	spin_lock(&op->lock); +	add_wait_queue(&op->waitq, &wait_entry); +	spin_unlock(&op->lock); + +	while (1) { +		set_current_state(TASK_INTERRUPTIBLE); + +		spin_lock(&op->lock); +		if (op_state_serviced(op)) { +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "%s:op-state is SERVICED.\n", +				     __func__); +			spin_unlock(&op->lock); +			ret = 0; +			break; +		} +		spin_unlock(&op->lock); + +		if (signal_pending(current)) { +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "%s:operation interrupted by a signal (tag" +				     " %llu, op %p)\n", +				     __func__, +				     llu(op->tag), +				     op); +			pvfs2_clean_up_interrupted_operation(op); +			ret = -EINTR; +			break; +		} + +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s:About to call schedule_timeout.\n", +			     __func__); +		ret = +		    schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs)); + +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s:Value returned from schedule_timeout(%d).\n", +			     __func__, +			     ret); +		if (!ret) { +			gossip_debug(GOSSIP_WAIT_DEBUG, +				     "%s:*** operation timed out: %p\n", +				     __func__, +				     op); +			pvfs2_clean_up_interrupted_operation(op); +			ret = -ETIMEDOUT; +			break; +		} + +		gossip_debug(GOSSIP_WAIT_DEBUG, +			     "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n", +			     __func__); +		ret = -ETIMEDOUT; +		break; +	} + +	set_current_state(TASK_RUNNING); + +	spin_lock(&op->lock); +	remove_wait_queue(&op->waitq, &wait_entry); +	spin_unlock(&op->lock); + +	gossip_debug(GOSSIP_WAIT_DEBUG, +		     "%s:returning ret(%d)\n", +		     __func__, +		     ret); + +	return ret; +} diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c new file mode 100644 index 000000000000..2766090f5ca4 --- /dev/null +++ b/fs/orangefs/xattr.c @@ -0,0 +1,532 @@ +/* + * (C) 2001 Clemson University and The University of Chicago + * + * See COPYING in top-level directory. + */ + +/* + *  Linux VFS extended attribute operations. + */ + +#include "protocol.h" +#include "pvfs2-kernel.h" +#include "pvfs2-bufmap.h" +#include <linux/posix_acl_xattr.h> +#include <linux/xattr.h> + + +#define SYSTEM_PVFS2_KEY "system.pvfs2." +#define SYSTEM_PVFS2_KEY_LEN 13 + +/* + * this function returns + *   0 if the key corresponding to name is not meant to be printed as part + *     of a listxattr. + *   1 if the key corresponding to name is meant to be returned as part of + *     a listxattr. + * The ones that start SYSTEM_PVFS2_KEY are the ones to avoid printing. + */ +static int is_reserved_key(const char *key, size_t size) +{ + +	if (size < SYSTEM_PVFS2_KEY_LEN) +		return 1; + +	return strncmp(key, SYSTEM_PVFS2_KEY, SYSTEM_PVFS2_KEY_LEN) ?  1 : 0; +} + +static inline int convert_to_internal_xattr_flags(int setxattr_flags) +{ +	int internal_flag = 0; + +	if (setxattr_flags & XATTR_REPLACE) { +		/* Attribute must exist! */ +		internal_flag = PVFS_XATTR_REPLACE; +	} else if (setxattr_flags & XATTR_CREATE) { +		/* Attribute must not exist */ +		internal_flag = PVFS_XATTR_CREATE; +	} +	return internal_flag; +} + + +/* + * Tries to get a specified key's attributes of a given + * file into a user-specified buffer. Note that the getxattr + * interface allows for the users to probe the size of an + * extended attribute by passing in a value of 0 to size. + * Thus our return value is always the size of the attribute + * unless the key does not exist for the file and/or if + * there were errors in fetching the attribute value. + */ +ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix, +		const char *name, void *buffer, size_t size) +{ +	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); +	struct pvfs2_kernel_op_s *new_op = NULL; +	ssize_t ret = -ENOMEM; +	ssize_t length = 0; +	int fsuid; +	int fsgid; + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "%s: prefix %s name %s, buffer_size %zd\n", +		     __func__, prefix, name, size); + +	if (name == NULL || (size > 0 && buffer == NULL)) { +		gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n"); +		return -EINVAL; +	} +	if (size < 0 || +	    (strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) { +		gossip_err("Invalid size (%d) or key length (%d)\n", +			   (int)size, +			   (int)(strlen(name) + strlen(prefix))); +		return -EINVAL; +	} + +	fsuid = from_kuid(current_user_ns(), current_fsuid()); +	fsgid = from_kgid(current_user_ns(), current_fsgid()); + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "getxattr on inode %pU, name %s " +		     "(uid %o, gid %o)\n", +		     get_khandle_from_ino(inode), +		     name, +		     fsuid, +		     fsgid); + +	down_read(&pvfs2_inode->xattr_sem); + +	new_op = op_alloc(PVFS2_VFS_OP_GETXATTR); +	if (!new_op) +		goto out_unlock; + +	new_op->upcall.req.getxattr.refn = pvfs2_inode->refn; +	ret = snprintf((char *)new_op->upcall.req.getxattr.key, +		       PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name); + +	/* +	 * NOTE: Although keys are meant to be NULL terminated textual +	 * strings, I am going to explicitly pass the length just in case +	 * we change this later on... +	 */ +	new_op->upcall.req.getxattr.key_sz = ret + 1; + +	ret = service_operation(new_op, "pvfs2_inode_getxattr", +				get_interruptible_flag(inode)); +	if (ret != 0) { +		if (ret == -ENOENT) { +			ret = -ENODATA; +			gossip_debug(GOSSIP_XATTR_DEBUG, +				     "pvfs2_inode_getxattr: inode %pU key %s" +				     " does not exist!\n", +				     get_khandle_from_ino(inode), +				     (char *)new_op->upcall.req.getxattr.key); +		} +		goto out_release_op; +	} + +	/* +	 * Length returned includes null terminator. +	 */ +	length = new_op->downcall.resp.getxattr.val_sz; + +	/* +	 * Just return the length of the queried attribute. +	 */ +	if (size == 0) { +		ret = length; +		goto out_release_op; +	} + +	/* +	 * Check to see if key length is > provided buffer size. +	 */ +	if (length > size) { +		ret = -ERANGE; +		goto out_release_op; +	} + +	memset(buffer, 0, size); +	memcpy(buffer, new_op->downcall.resp.getxattr.val, length); +	gossip_debug(GOSSIP_XATTR_DEBUG, +	     "pvfs2_inode_getxattr: inode %pU " +	     "key %s key_sz %d, val_len %d\n", +	     get_khandle_from_ino(inode), +	     (char *)new_op-> +		upcall.req.getxattr.key, +		     (int)new_op-> +		upcall.req.getxattr.key_sz, +	     (int)ret); + +	ret = length; + +out_release_op: +	op_release(new_op); +out_unlock: +	up_read(&pvfs2_inode->xattr_sem); +	return ret; +} + +static int pvfs2_inode_removexattr(struct inode *inode, +			    const char *prefix, +			    const char *name, +			    int flags) +{ +	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); +	struct pvfs2_kernel_op_s *new_op = NULL; +	int ret = -ENOMEM; + +	down_write(&pvfs2_inode->xattr_sem); +	new_op = op_alloc(PVFS2_VFS_OP_REMOVEXATTR); +	if (!new_op) +		goto out_unlock; + +	new_op->upcall.req.removexattr.refn = pvfs2_inode->refn; +	/* +	 * NOTE: Although keys are meant to be NULL terminated +	 * textual strings, I am going to explicitly pass the +	 * length just in case we change this later on... +	 */ +	ret = snprintf((char *)new_op->upcall.req.removexattr.key, +		       PVFS_MAX_XATTR_NAMELEN, +		       "%s%s", +		       (prefix ? prefix : ""), +		       name); +	new_op->upcall.req.removexattr.key_sz = ret + 1; + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "pvfs2_inode_removexattr: key %s, key_sz %d\n", +		     (char *)new_op->upcall.req.removexattr.key, +		     (int)new_op->upcall.req.removexattr.key_sz); + +	ret = service_operation(new_op, +				"pvfs2_inode_removexattr", +				get_interruptible_flag(inode)); +	if (ret == -ENOENT) { +		/* +		 * Request to replace a non-existent attribute is an error. +		 */ +		if (flags & XATTR_REPLACE) +			ret = -ENODATA; +		else +			ret = 0; +	} + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "pvfs2_inode_removexattr: returning %d\n", ret); + +	op_release(new_op); +out_unlock: +	up_write(&pvfs2_inode->xattr_sem); +	return ret; +} + +/* + * Tries to set an attribute for a given key on a file. + * + * Returns a -ve number on error and 0 on success.  Key is text, but value + * can be binary! + */ +int pvfs2_inode_setxattr(struct inode *inode, const char *prefix, +		const char *name, const void *value, size_t size, int flags) +{ +	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); +	struct pvfs2_kernel_op_s *new_op; +	int internal_flag = 0; +	int ret = -ENOMEM; + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "%s: prefix %s, name %s, buffer_size %zd\n", +		     __func__, prefix, name, size); + +	if (size < 0 || +	    size >= PVFS_MAX_XATTR_VALUELEN || +	    flags < 0) { +		gossip_err("pvfs2_inode_setxattr: bogus values of size(%d), flags(%d)\n", +			   (int)size, +			   flags); +		return -EINVAL; +	} + +	if (name == NULL || +	    (size > 0 && value == NULL)) { +		gossip_err("pvfs2_inode_setxattr: bogus NULL pointers!\n"); +		return -EINVAL; +	} + +	internal_flag = convert_to_internal_xattr_flags(flags); + +	if (prefix) { +		if (strlen(name) + strlen(prefix) >= PVFS_MAX_XATTR_NAMELEN) { +			gossip_err +			    ("pvfs2_inode_setxattr: bogus key size (%d)\n", +			     (int)(strlen(name) + strlen(prefix))); +			return -EINVAL; +		} +	} else { +		if (strlen(name) >= PVFS_MAX_XATTR_NAMELEN) { +			gossip_err +			    ("pvfs2_inode_setxattr: bogus key size (%d)\n", +			     (int)(strlen(name))); +			return -EINVAL; +		} +	} + +	/* This is equivalent to a removexattr */ +	if (size == 0 && value == NULL) { +		gossip_debug(GOSSIP_XATTR_DEBUG, +			     "removing xattr (%s%s)\n", +			     prefix, +			     name); +		return pvfs2_inode_removexattr(inode, prefix, name, flags); +	} + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "setxattr on inode %pU, name %s\n", +		     get_khandle_from_ino(inode), +		     name); + +	down_write(&pvfs2_inode->xattr_sem); +	new_op = op_alloc(PVFS2_VFS_OP_SETXATTR); +	if (!new_op) +		goto out_unlock; + + +	new_op->upcall.req.setxattr.refn = pvfs2_inode->refn; +	new_op->upcall.req.setxattr.flags = internal_flag; +	/* +	 * NOTE: Although keys are meant to be NULL terminated textual +	 * strings, I am going to explicitly pass the length just in +	 * case we change this later on... +	 */ +	ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key, +		       PVFS_MAX_XATTR_NAMELEN, +		       "%s%s", +		       prefix, name); +	new_op->upcall.req.setxattr.keyval.key_sz = ret + 1; +	memcpy(new_op->upcall.req.setxattr.keyval.val, value, size); +	new_op->upcall.req.setxattr.keyval.val_sz = size; + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "pvfs2_inode_setxattr: key %s, key_sz %d " +		     " value size %zd\n", +		     (char *)new_op->upcall.req.setxattr.keyval.key, +		     (int)new_op->upcall.req.setxattr.keyval.key_sz, +		     size); + +	ret = service_operation(new_op, +				"pvfs2_inode_setxattr", +				get_interruptible_flag(inode)); + +	gossip_debug(GOSSIP_XATTR_DEBUG, +		     "pvfs2_inode_setxattr: returning %d\n", +		     ret); + +	/* when request is serviced properly, free req op struct */ +	op_release(new_op); +out_unlock: +	up_write(&pvfs2_inode->xattr_sem); +	return ret; +} + +/* + * Tries to get a specified object's keys into a user-specified buffer of a + * given size.  Note that like the previous instances of xattr routines, this + * also allows you to pass in a NULL pointer and 0 size to probe the size for + * subsequent memory allocations. Thus our return value is always the size of + * all the keys unless there were errors in fetching the keys! + */ +ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ +	struct inode *inode = dentry->d_inode; +	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); +	struct pvfs2_kernel_op_s *new_op; +	__u64 token = PVFS_ITERATE_START; +	ssize_t ret = -ENOMEM; +	ssize_t total = 0; +	ssize_t length = 0; +	int count_keys = 0; +	int key_size; +	int i = 0; + +	if (size > 0 && buffer == NULL) { +		gossip_err("%s: bogus NULL pointers\n", __func__); +		return -EINVAL; +	} +	if (size < 0) { +		gossip_err("Invalid size (%d)\n", (int)size); +		return -EINVAL; +	} + +	down_read(&pvfs2_inode->xattr_sem); +	new_op = op_alloc(PVFS2_VFS_OP_LISTXATTR); +	if (!new_op) +		goto out_unlock; + +	if (buffer && size > 0) +		memset(buffer, 0, size); + +try_again: +	key_size = 0; +	new_op->upcall.req.listxattr.refn = pvfs2_inode->refn; +	new_op->upcall.req.listxattr.token = token; +	new_op->upcall.req.listxattr.requested_count = +	    (size == 0) ? 0 : PVFS_MAX_XATTR_LISTLEN; +	ret = service_operation(new_op, __func__, +				get_interruptible_flag(inode)); +	if (ret != 0) +		goto done; + +	if (size == 0) { +		/* +		 * This is a bit of a big upper limit, but I did not want to +		 * spend too much time getting this correct, since users end +		 * up allocating memory rather than us... +		 */ +		total = new_op->downcall.resp.listxattr.returned_count * +			PVFS_MAX_XATTR_NAMELEN; +		goto done; +	} + +	length = new_op->downcall.resp.listxattr.keylen; +	if (length == 0) +		goto done; + +	/* +	 * Check to see how much can be fit in the buffer. Fit only whole keys. +	 */ +	for (i = 0; i < new_op->downcall.resp.listxattr.returned_count; i++) { +		if (total + new_op->downcall.resp.listxattr.lengths[i] > size) +			goto done; + +		/* +		 * Since many dumb programs try to setxattr() on our reserved +		 * xattrs this is a feeble attempt at defeating those by not +		 * listing them in the output of listxattr.. sigh +		 */ +		if (is_reserved_key(new_op->downcall.resp.listxattr.key + +				    key_size, +				    new_op->downcall.resp. +					listxattr.lengths[i])) { +			gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n", +					i, new_op->downcall.resp.listxattr.key + +						key_size); +			memcpy(buffer + total, +				new_op->downcall.resp.listxattr.key + key_size, +				new_op->downcall.resp.listxattr.lengths[i]); +			total += new_op->downcall.resp.listxattr.lengths[i]; +			count_keys++; +		} else { +			gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n", +					i, new_op->downcall.resp.listxattr.key + +						key_size); +		} +		key_size += new_op->downcall.resp.listxattr.lengths[i]; +	} + +	/* +	 * Since the buffer was large enough, we might have to continue +	 * fetching more keys! +	 */ +	token = new_op->downcall.resp.listxattr.token; +	if (token != PVFS_ITERATE_END) +		goto try_again; + +done: +	gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d" +		     " [size of buffer %ld] (filled in %d keys)\n", +		     __func__, +		     ret ? (int)ret : (int)total, +		     (long)size, +		     count_keys); +	op_release(new_op); +	if (ret == 0) +		ret = total; +out_unlock: +	up_read(&pvfs2_inode->xattr_sem); +	return ret; +} + +int pvfs2_xattr_set_default(struct dentry *dentry, +			    const char *name, +			    const void *buffer, +			    size_t size, +			    int flags, +			    int handler_flags) +{ +	return pvfs2_inode_setxattr(dentry->d_inode, +				    PVFS2_XATTR_NAME_DEFAULT_PREFIX, +				    name, +				    buffer, +				    size, +				    flags); +} + +int pvfs2_xattr_get_default(struct dentry *dentry, +			    const char *name, +			    void *buffer, +			    size_t size, +			    int handler_flags) +{ +	return pvfs2_inode_getxattr(dentry->d_inode, +				    PVFS2_XATTR_NAME_DEFAULT_PREFIX, +				    name, +				    buffer, +				    size); + +} + +static int pvfs2_xattr_set_trusted(struct dentry *dentry, +			    const char *name, +			    const void *buffer, +			    size_t size, +			    int flags, +			    int handler_flags) +{ +	return pvfs2_inode_setxattr(dentry->d_inode, +				    PVFS2_XATTR_NAME_TRUSTED_PREFIX, +				    name, +				    buffer, +				    size, +				    flags); +} + +static int pvfs2_xattr_get_trusted(struct dentry *dentry, +			    const char *name, +			    void *buffer, +			    size_t size, +			    int handler_flags) +{ +	return pvfs2_inode_getxattr(dentry->d_inode, +				    PVFS2_XATTR_NAME_TRUSTED_PREFIX, +				    name, +				    buffer, +				    size); +} + +static struct xattr_handler pvfs2_xattr_trusted_handler = { +	.prefix = PVFS2_XATTR_NAME_TRUSTED_PREFIX, +	.get = pvfs2_xattr_get_trusted, +	.set = pvfs2_xattr_set_trusted, +}; + +static struct xattr_handler pvfs2_xattr_default_handler = { +	/* +	 * NOTE: this is set to be the empty string. +	 * so that all un-prefixed xattrs keys get caught +	 * here! +	 */ +	.prefix = PVFS2_XATTR_NAME_DEFAULT_PREFIX, +	.get = pvfs2_xattr_get_default, +	.set = pvfs2_xattr_set_default, +}; + +const struct xattr_handler *pvfs2_xattr_handlers[] = { +	&posix_acl_access_xattr_handler, +	&posix_acl_default_xattr_handler, +	&pvfs2_xattr_trusted_handler, +	&pvfs2_xattr_default_handler, +	NULL +}; | 
