summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c22
-rw-r--r--fs/9p/acl.h6
-rw-r--r--fs/9p/cache.c20
-rw-r--r--fs/9p/cache.h9
-rw-r--r--fs/9p/v9fs.c45
-rw-r--r--fs/9p/v9fs.h29
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_file.c36
-rw-r--r--fs/9p/vfs_inode.c177
-rw-r--r--fs/9p/vfs_inode_dotl.c162
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/befs/linuxvfs.c23
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/btrfs/extent-tree.c12
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/tree-log.c28
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/connect.c7
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/inode.c14
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/ecryptfs/main.c23
-rw-r--r--fs/ecryptfs/read_write.c18
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext3/namei.c6
-rw-r--r--fs/ext4/ext4_jbd2.h4
-rw-r--r--fs/ext4/inode.c23
-rw-r--r--fs/ext4/namei.c6
-rw-r--r--fs/ext4/page-io.c6
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/fs-writeback.c26
-rw-r--r--fs/fuse/dev.c16
-rw-r--r--fs/hfsplus/hfsplus_fs.h16
-rw-r--r--fs/hfsplus/part_tbl.c32
-rw-r--r--fs/hfsplus/super.c16
-rw-r--r--fs/hfsplus/wrapper.c87
-rw-r--r--fs/namei.c4
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c25
-rw-r--r--fs/nfs/callback_xdr.c24
-rw-r--r--fs/nfs/objlayout/objio_osd.c28
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c3
-rw-r--r--fs/proc/task_mmu.c80
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c48
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c10
-rw-r--r--fs/xfs/xfs_buf_item.c3
-rw-r--r--fs/xfs/xfs_inode_item.c10
-rw-r--r--fs/xfs/xfs_trans.c27
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c192
-rw-r--r--fs/xfs/xfs_trans_priv.h18
55 files changed, 969 insertions, 435 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 535ab6eccb1a..4a866cd06287 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -185,12 +185,15 @@ int v9fs_acl_chmod(struct dentry *dentry)
}
int v9fs_set_create_acl(struct dentry *dentry,
- struct posix_acl *dpacl, struct posix_acl *pacl)
+ struct posix_acl **dpacl, struct posix_acl **pacl)
{
- v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
- v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
- posix_acl_release(dpacl);
- posix_acl_release(pacl);
+ if (dentry) {
+ v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, *dpacl);
+ v9fs_set_acl(dentry, ACL_TYPE_ACCESS, *pacl);
+ }
+ posix_acl_release(*dpacl);
+ posix_acl_release(*pacl);
+ *dpacl = *pacl = NULL;
return 0;
}
@@ -212,11 +215,11 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep,
struct posix_acl *clone;
if (S_ISDIR(mode))
- *dpacl = acl;
+ *dpacl = posix_acl_dup(acl);
clone = posix_acl_clone(acl, GFP_NOFS);
- retval = -ENOMEM;
+ posix_acl_release(acl);
if (!clone)
- goto cleanup;
+ return -ENOMEM;
retval = posix_acl_create_masq(clone, &mode);
if (retval < 0) {
@@ -225,11 +228,12 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep,
}
if (retval > 0)
*pacl = clone;
+ else
+ posix_acl_release(clone);
}
*modep = mode;
return 0;
cleanup:
- posix_acl_release(acl);
return retval;
}
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 7ef3ac9f6d95..c47ea9cf3031 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -19,7 +19,7 @@ extern int v9fs_get_acl(struct inode *, struct p9_fid *);
extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
extern int v9fs_acl_chmod(struct dentry *);
extern int v9fs_set_create_acl(struct dentry *,
- struct posix_acl *, struct posix_acl *);
+ struct posix_acl **, struct posix_acl **);
extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
struct posix_acl **dpacl, struct posix_acl **pacl);
#else
@@ -33,8 +33,8 @@ static inline int v9fs_acl_chmod(struct dentry *dentry)
return 0;
}
static inline int v9fs_set_create_acl(struct dentry *dentry,
- struct posix_acl *dpacl,
- struct posix_acl *pacl)
+ struct posix_acl **dpacl,
+ struct posix_acl **pacl)
{
return 0;
}
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 5b335c5086a1..945aa5f02f9b 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -108,11 +108,10 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
- memcpy(buffer, &v9inode->fscache_key->path,
- sizeof(v9inode->fscache_key->path));
+ memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
- v9inode->fscache_key->path);
- return sizeof(v9inode->fscache_key->path);
+ v9inode->qid.path);
+ return sizeof(v9inode->qid.path);
}
static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
@@ -129,11 +128,10 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
void *buffer, uint16_t buflen)
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
- memcpy(buffer, &v9inode->fscache_key->version,
- sizeof(v9inode->fscache_key->version));
+ memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
- v9inode->fscache_key->version);
- return sizeof(v9inode->fscache_key->version);
+ v9inode->qid.version);
+ return sizeof(v9inode->qid.version);
}
static enum
@@ -143,11 +141,11 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
- if (buflen != sizeof(v9inode->fscache_key->version))
+ if (buflen != sizeof(v9inode->qid.version))
return FSCACHE_CHECKAUX_OBSOLETE;
- if (memcmp(buffer, &v9inode->fscache_key->version,
- sizeof(v9inode->fscache_key->version)))
+ if (memcmp(buffer, &v9inode->qid.version,
+ sizeof(v9inode->qid.version)))
return FSCACHE_CHECKAUX_OBSOLETE;
return FSCACHE_CHECKAUX_OKAY;
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 049507a5b01c..40cc54ced5d9 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -93,15 +93,6 @@ static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
BUG_ON(PageFsCache(page));
}
-static inline void v9fs_fscache_set_key(struct inode *inode,
- struct p9_qid *qid)
-{
- struct v9fs_inode *v9inode = V9FS_I(inode);
- spin_lock(&v9inode->fscache_lock);
- v9inode->fscache_key = qid;
- spin_unlock(&v9inode->fscache_lock);
-}
-
static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
struct page *page)
{
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c82b017f51f3..ef9661886112 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -78,6 +78,25 @@ static const match_table_t tokens = {
{Opt_err, NULL}
};
+/* Interpret mount options for cache mode */
+static int get_cache_mode(char *s)
+{
+ int version = -EINVAL;
+
+ if (!strcmp(s, "loose")) {
+ version = CACHE_LOOSE;
+ P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n");
+ } else if (!strcmp(s, "fscache")) {
+ version = CACHE_FSCACHE;
+ P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n");
+ } else if (!strcmp(s, "none")) {
+ version = CACHE_NONE;
+ P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n");
+ } else
+ printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s);
+ return version;
+}
+
/**
* v9fs_parse_options - parse mount options into session structure
* @v9ses: existing v9fs session information
@@ -97,7 +116,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
/* setup defaults */
v9ses->afid = ~0;
v9ses->debug = 0;
- v9ses->cache = 0;
+ v9ses->cache = CACHE_NONE;
#ifdef CONFIG_9P_FSCACHE
v9ses->cachetag = NULL;
#endif
@@ -171,13 +190,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
"problem allocating copy of cache arg\n");
goto free_and_return;
}
+ ret = get_cache_mode(s);
+ if (ret == -EINVAL) {
+ kfree(s);
+ goto free_and_return;
+ }
- if (strcmp(s, "loose") == 0)
- v9ses->cache = CACHE_LOOSE;
- else if (strcmp(s, "fscache") == 0)
- v9ses->cache = CACHE_FSCACHE;
- else
- v9ses->cache = CACHE_NONE;
+ v9ses->cache = ret;
kfree(s);
break;
@@ -200,9 +219,15 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
} else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
- if (*e != '\0')
- v9ses->uid = ~0;
+ if (*e != '\0') {
+ ret = -EINVAL;
+ printk(KERN_INFO "9p: Unknown access "
+ "argument %s.\n", s);
+ kfree(s);
+ goto free_and_return;
+ }
}
+
kfree(s);
break;
@@ -487,8 +512,8 @@ static void v9fs_inode_init_once(void *foo)
struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
#ifdef CONFIG_9P_FSCACHE
v9inode->fscache = NULL;
- v9inode->fscache_key = NULL;
#endif
+ memset(&v9inode->qid, 0, sizeof(v9inode->qid));
inode_init_once(&v9inode->vfs_inode);
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e5ebedfc5ed8..e78956cbd702 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -125,8 +125,8 @@ struct v9fs_inode {
#ifdef CONFIG_9P_FSCACHE
spinlock_t fscache_lock;
struct fscache_cookie *fscache;
- struct p9_qid *fscache_key;
#endif
+ struct p9_qid qid;
unsigned int cache_validity;
struct p9_fid *writeback_fid;
struct mutex v_mutex;
@@ -153,13 +153,13 @@ extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
void *p);
extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
struct p9_fid *fid,
- struct super_block *sb);
+ struct super_block *sb, int new);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
struct p9_fid *fid,
- struct super_block *sb);
+ struct super_block *sb, int new);
/* other default globals */
#define V9FS_PORT 564
@@ -201,8 +201,27 @@ v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
struct super_block *sb)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0);
else
- return v9fs_inode_from_fid(v9ses, fid, sb);
+ return v9fs_inode_from_fid(v9ses, fid, sb, 0);
}
+
+/**
+ * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1);
+ else
+ return v9fs_inode_from_fid(v9ses, fid, sb, 1);
+}
+
#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 4014160903a9..f9a28eab7816 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_destroy_inode(struct inode *inode);
-struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t);
int v9fs_init_inode(struct v9fs_session_info *v9ses,
- struct inode *inode, int mode);
+ struct inode *inode, int mode, dev_t);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -82,4 +82,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
return;
}
+
+int v9fs_open_to_dotl_flags(int flags);
#endif
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index ffed55817f0c..9d6e1685d922 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode = V9FS_I(inode);
v9ses = v9fs_inode2v9ses(inode);
if (v9fs_proto_dotl(v9ses))
- omode = file->f_flags;
+ omode = v9fs_open_to_dotl_flags(file->f_flags);
else
omode = v9fs_uflags2omode(file->f_flags,
v9fs_proto_dotu(v9ses));
@@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
/* convert posix lock to p9 tlock args */
memset(&flock, 0, sizeof(flock));
- flock.type = fl->fl_type;
+ /* map the lock type */
+ switch (fl->fl_type) {
+ case F_RDLCK:
+ flock.type = P9_LOCK_TYPE_RDLCK;
+ break;
+ case F_WRLCK:
+ flock.type = P9_LOCK_TYPE_WRLCK;
+ break;
+ case F_UNLCK:
+ flock.type = P9_LOCK_TYPE_UNLCK;
+ break;
+ }
flock.start = fl->fl_start;
if (fl->fl_end == OFFSET_MAX)
flock.length = 0;
@@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
/* convert posix lock to p9 tgetlock args */
memset(&glock, 0, sizeof(glock));
- glock.type = fl->fl_type;
+ glock.type = P9_LOCK_TYPE_UNLCK;
glock.start = fl->fl_start;
if (fl->fl_end == OFFSET_MAX)
glock.length = 0;
@@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
res = p9_client_getlock_dotl(fid, &glock);
if (res < 0)
return res;
- if (glock.type != F_UNLCK) {
- fl->fl_type = glock.type;
+ /* map 9p lock type to os lock type */
+ switch (glock.type) {
+ case P9_LOCK_TYPE_RDLCK:
+ fl->fl_type = F_RDLCK;
+ break;
+ case P9_LOCK_TYPE_WRLCK:
+ fl->fl_type = F_WRLCK;
+ break;
+ case P9_LOCK_TYPE_UNLCK:
+ fl->fl_type = F_UNLCK;
+ break;
+ }
+ if (glock.type != P9_LOCK_TYPE_UNLCK) {
fl->fl_start = glock.start;
if (glock.length == 0)
fl->fl_end = OFFSET_MAX;
else
fl->fl_end = glock.start + glock.length - 1;
fl->fl_pid = glock.proc_id;
- } else
- fl->fl_type = F_UNLCK;
-
+ }
return res;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7f6c67703195..c72e20cdfb9a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
/**
* p9mode2unixmode- convert plan9 mode bits to unix mode bits
* @v9ses: v9fs session information
- * @mode: mode to convert
+ * @stat: p9_wstat from which mode need to be derived
+ * @rdev: major number, minor number in case of device files.
*
*/
-
-static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+static int p9mode2unixmode(struct v9fs_session_info *v9ses,
+ struct p9_wstat *stat, dev_t *rdev)
{
int res;
+ int mode = stat->mode;
- res = mode & 0777;
+ res = mode & S_IALLUGO;
+ *rdev = 0;
if ((mode & P9_DMDIR) == P9_DMDIR)
res |= S_IFDIR;
@@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
&& (v9ses->nodev == 0))
res |= S_IFIFO;
else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses))
- && (v9ses->nodev == 0))
- res |= S_IFBLK;
- else
+ && (v9ses->nodev == 0)) {
+ char type = 0, ext[32];
+ int major = -1, minor = -1;
+
+ strncpy(ext, stat->extension, sizeof(ext));
+ sscanf(ext, "%c %u %u", &type, &major, &minor);
+ switch (type) {
+ case 'c':
+ res |= S_IFCHR;
+ break;
+ case 'b':
+ res |= S_IFBLK;
+ break;
+ default:
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "Unknown special type %c %s\n", type,
+ stat->extension);
+ };
+ *rdev = MKDEV(major, minor);
+ } else
res |= S_IFREG;
if (v9fs_proto_dotu(v9ses)) {
@@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
res |= S_ISVTX;
}
-
return res;
}
@@ -216,7 +235,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
return NULL;
#ifdef CONFIG_9P_FSCACHE
v9inode->fscache = NULL;
- v9inode->fscache_key = NULL;
spin_lock_init(&v9inode->fscache_lock);
#endif
v9inode->writeback_fid = NULL;
@@ -243,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode)
}
int v9fs_init_inode(struct v9fs_session_info *v9ses,
- struct inode *inode, int mode)
+ struct inode *inode, int mode, dev_t rdev)
{
int err = 0;
inode_init_owner(inode, NULL, mode);
inode->i_blocks = 0;
- inode->i_rdev = 0;
+ inode->i_rdev = rdev;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->a_ops = &v9fs_addr_operations;
@@ -336,7 +354,7 @@ error:
*
*/
-struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev)
{
int err;
struct inode *inode;
@@ -349,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
return ERR_PTR(-ENOMEM);
}
- err = v9fs_init_inode(v9ses, inode, mode);
+ err = v9fs_init_inode(v9ses, inode, mode, rdev);
if (err) {
iput(inode);
return ERR_PTR(err);
@@ -433,17 +451,62 @@ void v9fs_evict_inode(struct inode *inode)
}
}
+static int v9fs_test_inode(struct inode *inode, void *data)
+{
+ int umode;
+ dev_t rdev;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_wstat *st = (struct p9_wstat *)data;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+
+ umode = p9mode2unixmode(v9ses, st, &rdev);
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ return 0;
+
+ /* compare qid details */
+ if (memcmp(&v9inode->qid.version,
+ &st->qid.version, sizeof(v9inode->qid.version)))
+ return 0;
+
+ if (v9inode->qid.type != st->qid.type)
+ return 0;
+ return 1;
+}
+
+static int v9fs_test_new_inode(struct inode *inode, void *data)
+{
+ return 0;
+}
+
+static int v9fs_set_inode(struct inode *inode, void *data)
+{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_wstat *st = (struct p9_wstat *)data;
+
+ memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
+ return 0;
+}
+
static struct inode *v9fs_qid_iget(struct super_block *sb,
struct p9_qid *qid,
- struct p9_wstat *st)
+ struct p9_wstat *st,
+ int new)
{
+ dev_t rdev;
int retval, umode;
unsigned long i_ino;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ int (*test)(struct inode *, void *);
+
+ if (new)
+ test = v9fs_test_new_inode;
+ else
+ test = v9fs_test_inode;
i_ino = v9fs_qid2ino(qid);
- inode = iget_locked(sb, i_ino);
+ inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st);
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW))
@@ -453,14 +516,14 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
* FIXME!! we may need support for stale inodes
* later.
*/
- umode = p9mode2unixmode(v9ses, st->mode);
- retval = v9fs_init_inode(v9ses, inode, umode);
+ inode->i_ino = i_ino;
+ umode = p9mode2unixmode(v9ses, st, &rdev);
+ retval = v9fs_init_inode(v9ses, inode, umode, rdev);
if (retval)
goto error;
v9fs_stat2inode(st, inode, sb);
#ifdef CONFIG_9P_FSCACHE
- v9fs_fscache_set_key(inode, &st->qid);
v9fs_cache_inode_get_cookie(inode);
#endif
unlock_new_inode(inode);
@@ -474,7 +537,7 @@ error:
struct inode *
v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, int new)
{
struct p9_wstat *st;
struct inode *inode = NULL;
@@ -483,7 +546,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
if (IS_ERR(st))
return ERR_CAST(st);
- inode = v9fs_qid_iget(sb, &st->qid, st);
+ inode = v9fs_qid_iget(sb, &st->qid, st, new);
p9stat_free(st);
kfree(st);
return inode;
@@ -585,19 +648,17 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
goto error;
}
- d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
goto error;
-
+ d_instantiate(dentry, inode);
return ofid;
-
error:
if (ofid)
p9_client_clunk(ofid);
@@ -738,6 +799,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nameidata)
{
+ struct dentry *res;
struct super_block *sb;
struct v9fs_session_info *v9ses;
struct p9_fid *dfid, *fid;
@@ -769,22 +831,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(result);
}
-
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ /*
+ * Make sure we don't use a wrong inode due to parallel
+ * unlink. For cached mode create calls request for new
+ * inode. But with cache disabled, lookup should do this.
+ */
+ if (v9ses->cache)
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ else
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
inode = NULL;
goto error;
}
-
result = v9fs_fid_add(dentry, fid);
if (result < 0)
goto error_iput;
-
inst_out:
- d_add(dentry, inode);
- return NULL;
-
+ /*
+ * If we had a rename on the server and a parallel lookup
+ * for the new name, then make sure we instantiate with
+ * the new name. ie look up for a/b, while on server somebody
+ * moved b under k and client parallely did a lookup for
+ * k/b.
+ */
+ res = d_materialise_unique(dentry, inode);
+ if (!IS_ERR(res))
+ return res;
+ result = PTR_ERR(res);
error_iput:
iput(inode);
error:
@@ -950,7 +1025,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return PTR_ERR(st);
v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
- generic_fillattr(dentry->d_inode, stat);
+ generic_fillattr(dentry->d_inode, stat);
p9stat_free(st);
kfree(st);
@@ -1034,6 +1109,7 @@ void
v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
struct super_block *sb)
{
+ mode_t mode;
char ext[32];
char tag_name[14];
unsigned int i_nlink;
@@ -1069,31 +1145,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
inode->i_nlink = i_nlink;
}
}
- inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
- if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
- char type = 0;
- int major = -1;
- int minor = -1;
-
- strncpy(ext, stat->extension, sizeof(ext));
- sscanf(ext, "%c %u %u", &type, &major, &minor);
- switch (type) {
- case 'c':
- inode->i_mode &= ~S_IFBLK;
- inode->i_mode |= S_IFCHR;
- break;
- case 'b':
- break;
- default:
- P9_DPRINTK(P9_DEBUG_ERROR,
- "Unknown special type %c %s\n", type,
- stat->extension);
- };
- inode->i_rdev = MKDEV(major, minor);
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
- } else
- inode->i_rdev = 0;
-
+ mode = stat->mode & S_IALLUGO;
+ mode |= inode->i_mode & ~S_IALLUGO;
+ inode->i_mode = mode;
i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
@@ -1359,6 +1413,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
{
+ int umode;
+ dev_t rdev;
loff_t i_size;
struct p9_wstat *st;
struct v9fs_session_info *v9ses;
@@ -1367,6 +1423,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
st = p9_client_stat(fid);
if (IS_ERR(st))
return PTR_ERR(st);
+ /*
+ * Don't update inode if the file type is different
+ */
+ umode = p9mode2unixmode(v9ses, st, &rdev);
+ if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ goto out;
spin_lock(&inode->i_lock);
/*
@@ -1378,6 +1440,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
if (v9ses->cache)
inode->i_size = i_size;
spin_unlock(&inode->i_lock);
+out:
p9stat_free(st);
kfree(st);
return 0;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 691c78f58bef..c873172ab378 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,18 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
return dentry;
}
+static int v9fs_test_inode_dotl(struct inode *inode, void *data)
+{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
+
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ return 0;
+
+ if (inode->i_generation != st->st_gen)
+ return 0;
+
+ /* compare qid details */
+ if (memcmp(&v9inode->qid.version,
+ &st->qid.version, sizeof(v9inode->qid.version)))
+ return 0;
+
+ if (v9inode->qid.type != st->qid.type)
+ return 0;
+ return 1;
+}
+
+/* Always get a new inode */
+static int v9fs_test_new_inode_dotl(struct inode *inode, void *data)
+{
+ return 0;
+}
+
+static int v9fs_set_inode_dotl(struct inode *inode, void *data)
+{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
+
+ memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
+ inode->i_generation = st->st_gen;
+ return 0;
+}
+
static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
struct p9_qid *qid,
struct p9_fid *fid,
- struct p9_stat_dotl *st)
+ struct p9_stat_dotl *st,
+ int new)
{
int retval;
unsigned long i_ino;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ int (*test)(struct inode *, void *);
+
+ if (new)
+ test = v9fs_test_new_inode_dotl;
+ else
+ test = v9fs_test_inode_dotl;
i_ino = v9fs_qid2ino(qid);
- inode = iget_locked(sb, i_ino);
+ inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st);
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW))
@@ -107,13 +152,14 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
* FIXME!! we may need support for stale inodes
* later.
*/
- retval = v9fs_init_inode(v9ses, inode, st->st_mode);
+ inode->i_ino = i_ino;
+ retval = v9fs_init_inode(v9ses, inode,
+ st->st_mode, new_decode_dev(st->st_rdev));
if (retval)
goto error;
v9fs_stat2inode_dotl(st, inode);
#ifdef CONFIG_9P_FSCACHE
- v9fs_fscache_set_key(inode, &st->qid);
v9fs_cache_inode_get_cookie(inode);
#endif
retval = v9fs_get_acl(inode, fid);
@@ -131,20 +177,72 @@ error:
struct inode *
v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, int new)
{
struct p9_stat_dotl *st;
struct inode *inode = NULL;
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN);
if (IS_ERR(st))
return ERR_CAST(st);
- inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
+ inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new);
kfree(st);
return inode;
}
+struct dotl_openflag_map {
+ int open_flag;
+ int dotl_flag;
+};
+
+static int v9fs_mapped_dotl_flags(int flags)
+{
+ int i;
+ int rflags = 0;
+ struct dotl_openflag_map dotl_oflag_map[] = {
+ { O_CREAT, P9_DOTL_CREATE },
+ { O_EXCL, P9_DOTL_EXCL },
+ { O_NOCTTY, P9_DOTL_NOCTTY },
+ { O_TRUNC, P9_DOTL_TRUNC },
+ { O_APPEND, P9_DOTL_APPEND },
+ { O_NONBLOCK, P9_DOTL_NONBLOCK },
+ { O_DSYNC, P9_DOTL_DSYNC },
+ { FASYNC, P9_DOTL_FASYNC },
+ { O_DIRECT, P9_DOTL_DIRECT },
+ { O_LARGEFILE, P9_DOTL_LARGEFILE },
+ { O_DIRECTORY, P9_DOTL_DIRECTORY },
+ { O_NOFOLLOW, P9_DOTL_NOFOLLOW },
+ { O_NOATIME, P9_DOTL_NOATIME },
+ { O_CLOEXEC, P9_DOTL_CLOEXEC },
+ { O_SYNC, P9_DOTL_SYNC},
+ };
+ for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
+ if (flags & dotl_oflag_map[i].open_flag)
+ rflags |= dotl_oflag_map[i].dotl_flag;
+ }
+ return rflags;
+}
+
+/**
+ * v9fs_open_to_dotl_flags- convert Linux specific open flags to
+ * plan 9 open flag.
+ * @flags: flags to convert
+ */
+int v9fs_open_to_dotl_flags(int flags)
+{
+ int rflags = 0;
+
+ /*
+ * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
+ * and P9_DOTL_NOACCESS
+ */
+ rflags |= flags & O_ACCMODE;
+ rflags |= v9fs_mapped_dotl_flags(flags);
+
+ return rflags;
+}
+
/**
* v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
* @dir: directory inode that is being created
@@ -213,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
"Failed to get acl values in creat %d\n", err);
goto error;
}
- err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+ err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
+ mode, gid, &qid);
if (err < 0) {
P9_DPRINTK(P9_DEBUG_VFS,
"p9_client_open_dotl failed in creat %d\n",
@@ -230,19 +329,19 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
fid = NULL;
goto error;
}
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
goto error;
}
- d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
goto error;
+ d_instantiate(dentry, inode);
/* Now set the ACL based on the default value */
- v9fs_set_create_acl(dentry, dacl, pacl);
+ v9fs_set_create_acl(dentry, &dacl, &pacl);
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
@@ -283,6 +382,7 @@ error:
err_clunk_old_fid:
if (ofid)
p9_client_clunk(ofid);
+ v9fs_set_create_acl(NULL, &dacl, &pacl);
return err;
}
@@ -350,17 +450,17 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
goto error;
}
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
err);
goto error;
}
- d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
goto error;
+ d_instantiate(dentry, inode);
fid = NULL;
} else {
/*
@@ -368,7 +468,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
* inode with stat. We need to get an inode
* so that we can set the acl with dentry
*/
- inode = v9fs_get_inode(dir->i_sb, mode);
+ inode = v9fs_get_inode(dir->i_sb, mode, 0);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto error;
@@ -376,12 +476,13 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
d_instantiate(dentry, inode);
}
/* Now set the ACL based on the default value */
- v9fs_set_create_acl(dentry, dacl, pacl);
+ v9fs_set_create_acl(dentry, &dacl, &pacl);
inc_nlink(dir);
v9fs_invalidate_inode_attr(dir);
error:
if (fid)
p9_client_clunk(fid);
+ v9fs_set_create_acl(NULL, &dacl, &pacl);
return err;
}
@@ -493,6 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
void
v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
{
+ mode_t mode;
struct v9fs_inode *v9inode = V9FS_I(inode);
if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -505,11 +607,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
inode->i_uid = stat->st_uid;
inode->i_gid = stat->st_gid;
inode->i_nlink = stat->st_nlink;
- inode->i_mode = stat->st_mode;
- inode->i_rdev = new_decode_dev(stat->st_rdev);
- if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ mode = stat->st_mode & S_IALLUGO;
+ mode |= inode->i_mode & ~S_IALLUGO;
+ inode->i_mode = mode;
i_size_write(inode, stat->st_size);
inode->i_blocks = stat->st_blocks;
@@ -547,7 +648,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
inode->i_blocks = stat->st_blocks;
}
if (stat->st_result_mask & P9_STATS_GEN)
- inode->i_generation = stat->st_gen;
+ inode->i_generation = stat->st_gen;
/* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
* because the inode structure does not have fields for them.
@@ -603,21 +704,21 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
}
/* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
err);
goto error;
}
- d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
goto error;
+ d_instantiate(dentry, inode);
fid = NULL;
} else {
/* Not in cached mode. No need to populate inode with stat */
- inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+ inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto error;
@@ -756,24 +857,24 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
goto error;
}
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
err);
goto error;
}
- d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
goto error;
+ d_instantiate(dentry, inode);
fid = NULL;
} else {
/*
* Not in cached mode. No need to populate inode with stat.
* socket syscall returns a fd, so we need instantiate
*/
- inode = v9fs_get_inode(dir->i_sb, mode);
+ inode = v9fs_get_inode(dir->i_sb, mode, rdev);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto error;
@@ -781,10 +882,11 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
d_instantiate(dentry, inode);
}
/* Now set the ACL based on the default value */
- v9fs_set_create_acl(dentry, dacl, pacl);
+ v9fs_set_create_acl(dentry, &dacl, &pacl);
error:
if (fid)
p9_client_clunk(fid);
+ v9fs_set_create_acl(NULL, &dacl, &pacl);
return err;
}
@@ -838,6 +940,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
if (IS_ERR(st))
return PTR_ERR(st);
+ /*
+ * Don't update inode if the file type is different
+ */
+ if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ goto out;
spin_lock(&inode->i_lock);
/*
@@ -849,6 +956,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
if (v9ses->cache)
inode->i_size = i_size;
spin_unlock(&inode->i_lock);
+out:
kfree(st);
return 0;
}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index feef6cdc1fd2..c70251d47ed1 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
else
sb->s_d_op = &v9fs_dentry_operations;
- inode = v9fs_get_inode(sb, S_IFDIR | mode);
+ inode = v9fs_get_inode(sb, S_IFDIR | mode, 0);
if (IS_ERR(inode)) {
retval = PTR_ERR(inode);
goto release_sb;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 54b8c28bebc8..720d885e8dca 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
- befs_debug(sb, "Follow long symlink");
-
- link = kmalloc(len, GFP_NOFS);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- } else if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
- befs_error(sb, "Failed to read entire long symlink");
+ if (len == 0) {
+ befs_error(sb, "Long symlink with illegal length");
link = ERR_PTR(-EIO);
} else {
- link[len - 1] = '\0';
+ befs_debug(sb, "Follow long symlink");
+
+ link = kmalloc(len, GFP_NOFS);
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ } else if (befs_read_lsymlink(sb, data, link, len) != len) {
+ kfree(link);
+ befs_error(sb, "Failed to read entire long symlink");
+ link = ERR_PTR(-EIO);
+ } else {
+ link[len - 1] = '\0';
+ }
}
} else {
link = befs_ino->i_data.symlink;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 610e8e0b04b8..194cf66bc8f5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1419,6 +1419,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
+ /* ->release can cause the old bdi to disappear,
+ * so must switch it out first
+ */
+ bdev_inode_switch_bdi(bdev->bd_inode,
+ &default_backing_dev_info);
}
if (bdev->bd_contains == bdev) {
if (disk->fops->release)
@@ -1432,8 +1437,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
- bdev_inode_switch_bdi(bdev->bd_inode,
- &default_backing_dev_info);
if (bdev != bdev->bd_contains)
victim = bdev->bd_contains;
bdev->bd_contains = NULL;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71cd456fdb60..7e20a65d2d4f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1784,6 +1784,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
for (i = 0; i < multi->num_stripes; i++, stripe++) {
+ if (!stripe->dev->can_discard)
+ continue;
+
ret = btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
stripe->length);
@@ -1791,11 +1794,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
discarded_bytes += stripe->length;
else if (ret != -EOPNOTSUPP)
break;
+
+ /*
+ * Just in case we get back EOPNOTSUPP for some reason,
+ * just ignore the return value so we don't screw up
+ * people calling discard_extent.
+ */
+ ret = 0;
}
kfree(multi);
}
- if (discarded_bytes && ret == -EOPNOTSUPP)
- ret = 0;
if (actual_bytes)
*actual_bytes = discarded_bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3601f0aebddf..d42e6bfdd3ab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4124,7 +4124,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
/* special case for "." */
if (filp->f_pos == 0) {
- over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
+ over = filldir(dirent, ".", 1,
+ filp->f_pos, btrfs_ino(inode), DT_DIR);
if (over)
return 0;
filp->f_pos = 1;
@@ -4133,7 +4134,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
if (filp->f_pos == 1) {
u64 pino = parent_ino(filp->f_path.dentry);
over = filldir(dirent, "..", 2,
- 2, pino, DT_DIR);
+ filp->f_pos, pino, DT_DIR);
if (over)
return 0;
filp->f_pos = 2;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4ce8a9f41d1e..7fa128d32f7a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct inode *dir;
- int ret;
struct btrfs_inode_ref *ref;
+ struct btrfs_dir_item *di;
+ struct inode *dir;
struct inode *inode;
- char *name;
- int namelen;
unsigned long ref_ptr;
unsigned long ref_end;
+ char *name;
+ int namelen;
+ int ret;
int search_done = 0;
/*
@@ -909,6 +910,25 @@ again:
}
btrfs_release_path(path);
+ /* look for a conflicting sequence number */
+ di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
+ btrfs_inode_ref_index(eb, ref),
+ name, namelen, 0);
+ if (di && !IS_ERR(di)) {
+ ret = drop_one_dir_item(trans, root, path, dir, di);
+ BUG_ON(ret);
+ }
+ btrfs_release_path(path);
+
+ /* look for a conflicing name */
+ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
+ name, namelen, 0);
+ if (di && !IS_ERR(di)) {
+ ret = drop_one_dir_item(trans, root, path, dir, di);
+ BUG_ON(ret);
+ }
+ btrfs_release_path(path);
+
insert:
/* insert our name */
ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 19450bc53632..43baaf0c6749 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -500,6 +500,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
fs_devices->rw_devices--;
}
+ if (device->can_discard)
+ fs_devices->num_can_discard--;
+
new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
BUG_ON(!new_device);
memcpy(new_device, device, sizeof(*new_device));
@@ -508,6 +511,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
new_device->bdev = NULL;
new_device->writeable = 0;
new_device->in_fs_metadata = 0;
+ new_device->can_discard = 0;
list_replace_rcu(&device->dev_list, &new_device->dev_list);
call_rcu(&device->rcu, free_device);
@@ -547,6 +551,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder)
{
+ struct request_queue *q;
struct block_device *bdev;
struct list_head *head = &fs_devices->devices;
struct btrfs_device *device;
@@ -603,6 +608,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
seeding = 0;
}
+ q = bdev_get_queue(bdev);
+ if (blk_queue_discard(q)) {
+ device->can_discard = 1;
+ fs_devices->num_can_discard++;
+ }
+
device->bdev = bdev;
device->in_fs_metadata = 0;
device->mode = flags;
@@ -1542,6 +1553,7 @@ error:
int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
{
+ struct request_queue *q;
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct block_device *bdev;
@@ -1611,6 +1623,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
lock_chunks(root);
+ q = bdev_get_queue(bdev);
+ if (blk_queue_discard(q))
+ device->can_discard = 1;
device->writeable = 1;
device->work.func = pending_bios_fn;
generate_random_uuid(device->uuid);
@@ -1646,6 +1661,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
root->fs_info->fs_devices->num_devices++;
root->fs_info->fs_devices->open_devices++;
root->fs_info->fs_devices->rw_devices++;
+ if (device->can_discard)
+ root->fs_info->fs_devices->num_can_discard++;
root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
if (!blk_queue_nonrot(bdev_get_queue(bdev)))
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7c12d61ae7ae..6d866db4e177 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -48,6 +48,7 @@ struct btrfs_device {
int writeable;
int in_fs_metadata;
int missing;
+ int can_discard;
spinlock_t io_lock;
@@ -104,6 +105,7 @@ struct btrfs_fs_devices {
u64 rw_devices;
u64 missing_devices;
u64 total_rw_bytes;
+ u64 num_can_discard;
struct block_device *latest_bdev;
/* all of the devices in the FS, protected by a mutex
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index bc4b12ca537b..53e7d72177d2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -566,6 +566,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
struct inode *dir = dentry->d_inode;
struct dentry *child;
+ if (!dir) {
+ dput(dentry);
+ dentry = ERR_PTR(-ENOENT);
+ break;
+ }
+
/* skip separators */
while (*s == sep)
s++;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1a9fe7f816d1..07132c4e99f3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4079,7 +4079,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
T2_FNEXT_RSP_PARMS *parms;
char *response_data;
int rc = 0;
- int bytes_returned, name_len;
+ int bytes_returned;
+ unsigned int name_len;
__u16 params, byte_count;
cFYI(1, "In FindNext");
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ccc1afa0bf3b..2451627c0158 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1258,7 +1258,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
/* ignore */
} else if (strnicmp(data, "guest", 5) == 0) {
/* ignore */
- } else if (strnicmp(data, "rw", 2) == 0) {
+ } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) {
/* ignore */
} else if (strnicmp(data, "ro", 2) == 0) {
/* ignore */
@@ -1361,7 +1361,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->server_ino = 1;
} else if (strnicmp(data, "noserverino", 9) == 0) {
vol->server_ino = 0;
- } else if (strnicmp(data, "rwpidforward", 4) == 0) {
+ } else if (strnicmp(data, "rwpidforward", 12) == 0) {
vol->rwpidforward = 1;
} else if (strnicmp(data, "cifsacl", 7) == 0) {
vol->cifs_acl = 1;
@@ -2838,7 +2838,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info)
kfree(volume_info->username);
kzfree(volume_info->password);
kfree(volume_info->UNC);
- kfree(volume_info->UNCip);
+ if (volume_info->UNCip != volume_info->UNC + 2)
+ kfree(volume_info->UNCip);
kfree(volume_info->domainname);
kfree(volume_info->iocharset);
kfree(volume_info->prepath);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d8d26f334ca0..16cdd6da227a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -110,8 +110,8 @@ cifs_bp_rename_retry:
}
rcu_read_unlock();
if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
- cERROR(1, "did not end path lookup where expected namelen is %d",
- namelen);
+ cFYI(1, "did not end path lookup where expected. namelen=%d "
+ "dfsplen=%d", namelen, dfsplen);
/* presumably this is only possible if racing with a rename
of one of the parent directories (we can not lock the dentries
above us to prevent this, but retrying should be harmless) */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9b018c8334fa..a7b2dcd4a53e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
if (full_path == NULL)
return full_path;
- if (dfsplen) {
+ if (dfsplen)
strncpy(full_path, tcon->treeName, dfsplen);
- /* switch slash direction in prepath depending on whether
- * windows or posix style path names
- */
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
- int i;
- for (i = 0; i < dfsplen; i++) {
- if (full_path[i] == '\\')
- full_path[i] = '/';
- }
- }
- }
strncpy(full_path + dfsplen, vol->prepath, pplen);
+ convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
full_path[dfsplen + pplen] = 0; /* add trailing null */
return full_path;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 147aa22c3c3a..c1b9c4b10739 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -362,6 +362,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
mid = AllocMidQEntry(hdr, server);
if (mid == NULL) {
mutex_unlock(&server->srv_mutex);
+ atomic_dec(&server->inFlight);
+ wake_up(&server->request_q);
return -ENOMEM;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 9f1bb747d77d..b4a6befb1216 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+ ecryptfs_opt_check_dev_ruid,
ecryptfs_opt_err };
static const match_table_t tokens = {
@@ -191,6 +192,7 @@ static const match_table_t tokens = {
{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
{ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
{ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
+ {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"},
{ecryptfs_opt_err, NULL}
};
@@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat(
* ecryptfs_parse_options
* @sb: The ecryptfs super block
* @options: The options passed to the kernel
+ * @check_ruid: set to 1 if device uid should be checked against the ruid
*
* Parse mount options:
* debug=N - ecryptfs_verbosity level for debug output
@@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat(
*
* Returns zero on success; non-zero on error
*/
-static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
+static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
+ uid_t *check_ruid)
{
char *p;
int rc = 0;
@@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
char *cipher_key_bytes_src;
char *fn_cipher_key_bytes_src;
+ *check_ruid = 0;
+
if (!options) {
rc = -EINVAL;
goto out;
@@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
mount_crypt_stat->flags |=
ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
break;
+ case ecryptfs_opt_check_dev_ruid:
+ *check_ruid = 1;
+ break;
case ecryptfs_opt_err:
default:
printk(KERN_WARNING
@@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
const char *err = "Getting sb failed";
struct inode *inode;
struct path path;
+ uid_t check_ruid;
int rc;
sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL);
@@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out;
}
- rc = ecryptfs_parse_options(sbi, raw_data);
+ rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
if (rc) {
err = "Error parsing options";
goto out;
@@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
"known incompatibilities\n");
goto out_free;
}
+
+ if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) {
+ rc = -EPERM;
+ printk(KERN_ERR "Mount of device (uid: %d) not owned by "
+ "requested user (uid: %d)\n",
+ path.dentry->d_inode->i_uid, current_uid());
+ goto out_free;
+ }
+
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 85d430963116..3745f7c2b9c2 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -39,15 +39,16 @@
int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
loff_t offset, size_t size)
{
- struct ecryptfs_inode_info *inode_info;
+ struct file *lower_file;
mm_segment_t fs_save;
ssize_t rc;
- inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
- BUG_ON(!inode_info->lower_file);
+ lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+ if (!lower_file)
+ return -EIO;
fs_save = get_fs();
set_fs(get_ds());
- rc = vfs_write(inode_info->lower_file, data, size, &offset);
+ rc = vfs_write(lower_file, data, size, &offset);
set_fs(fs_save);
mark_inode_dirty_sync(ecryptfs_inode);
return rc;
@@ -225,15 +226,16 @@ out:
int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
struct inode *ecryptfs_inode)
{
- struct ecryptfs_inode_info *inode_info =
- ecryptfs_inode_to_private(ecryptfs_inode);
+ struct file *lower_file;
mm_segment_t fs_save;
ssize_t rc;
- BUG_ON(!inode_info->lower_file);
+ lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+ if (!lower_file)
+ return -EIO;
fs_save = get_fs();
set_fs(get_ds());
- rc = vfs_read(inode_info->lower_file, data, size, &offset);
+ rc = vfs_read(lower_file, data, size, &offset);
set_fs(fs_save);
return rc;
}
diff --git a/fs/exec.c b/fs/exec.c
index 6075a1e727ae..044c13ffdc42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1411,6 +1411,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
printable(bprm->buf[2]) &&
printable(bprm->buf[3]))
break; /* -ENOEXEC */
+ if (try)
+ break; /* -ENOEXEC */
request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
#endif
}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9bfc48a..e5a71111cb32 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2210,9 +2210,11 @@ static int ext3_symlink (struct inode * dir,
/*
* For non-fast symlinks, we just allocate inode and put it on
* orphan list in the first transaction => we need bitmap,
- * group descriptor, sb, inode block, quota blocks.
+ * group descriptor, sb, inode block, quota blocks, and
+ * possibly selinux xattr blocks.
*/
- credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+ credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+ EXT3_XATTR_TRANS_BLOCKS;
} else {
/*
* Fast symlink. We have to add entry to directory
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index bb85757689b6..5802fa1dab18 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode)
static inline int ext4_should_writeback_data(struct inode *inode)
{
- if (!S_ISREG(inode->i_mode))
- return 0;
if (EXT4_JOURNAL(inode) == NULL)
return 1;
+ if (!S_ISREG(inode->i_mode))
+ return 0;
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 159300429d78..c94774c3276e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -189,6 +189,12 @@ void ext4_evict_inode(struct inode *inode)
int err;
trace_ext4_evict_inode(inode);
+
+ mutex_lock(&inode->i_mutex);
+ ext4_flush_completed_IO(inode);
+ mutex_unlock(&inode->i_mutex);
+ ext4_ioend_wait(inode);
+
if (inode->i_nlink) {
truncate_inode_pages(&inode->i_data, 0);
goto no_delete;
@@ -1849,6 +1855,8 @@ static int ext4_journalled_write_end(struct file *file,
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
+ BUG_ON(!ext4_handle_valid(handle));
+
if (copied < len) {
if (!PageUptodate(page))
copied = 0;
@@ -2569,6 +2577,8 @@ static int __ext4_journalled_writepage(struct page *page,
goto out;
}
+ BUG_ON(!ext4_handle_valid(handle));
+
ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
do_journal_get_write_access);
@@ -2746,7 +2756,7 @@ static int write_cache_pages_da(struct address_space *mapping,
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
@@ -2978,7 +2988,7 @@ static int ext4_da_writepages(struct address_space *mapping,
}
retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
while (!ret && wbc->nr_to_write > 0) {
@@ -3640,8 +3650,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
goto out;
}
- io_end->flag = EXT4_IO_END_UNWRITTEN;
+ /*
+ * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
+ * but being more careful is always safe for the future change.
+ */
inode = io_end->inode;
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ io_end->flag |= EXT4_IO_END_UNWRITTEN;
+ atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+ }
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b7721f51..458a394f6d3c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2264,9 +2264,11 @@ static int ext4_symlink(struct inode *dir,
/*
* For non-fast symlinks, we just allocate inode and put it on
* orphan list in the first transaction => we need bitmap,
- * group descriptor, sb, inode block, quota blocks.
+ * group descriptor, sb, inode block, quota blocks, and
+ * possibly selinux xattr blocks.
*/
- credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+ credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+ EXT4_XATTR_TRANS_BLOCKS;
} else {
/*
* Fast symlink. We have to add entry to directory
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7bb8f76d470a..97e5e98fd42a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -338,8 +338,10 @@ submit_and_retry:
if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
(io_end->pages[io_end->num_io_pages-1] != io_page))
goto submit_and_retry;
- if (buffer_uninit(bh))
- io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
+ if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ io_end->flag |= EXT4_IO_END_UNWRITTEN;
+ atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+ }
io->io_end->size += bh->b_size;
io->io_next_block++;
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9ea71aa864b3..111ed9d3c546 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -892,7 +892,6 @@ static void ext4_i_callback(struct rcu_head *head)
static void ext4_destroy_inode(struct inode *inode)
{
- ext4_ioend_wait(inode);
if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
ext4_msg(inode->i_sb, KERN_ERR,
"Inode %lu (%p): orphan list check failed!",
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3c72c9917775..49c8c98477b1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -36,6 +36,7 @@ struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
+ unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
@@ -418,6 +419,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
+ /*
+ * Sync livelock prevention. Each inode is tagged and synced in
+ * one shot. If still dirty, it will be redirty_tail()'ed below.
+ * Update the dirty time to prevent enqueue and sync it again.
+ */
+ if ((inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
+ inode->dirtied_when = jiffies;
+
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
/*
* We didn't write back all the pages. nfs_writepages()
@@ -650,6 +660,7 @@ static long wb_writeback(struct bdi_writeback *wb,
{
struct writeback_control wbc = {
.sync_mode = work->sync_mode,
+ .tagged_writepages = work->tagged_writepages,
.older_than_this = NULL,
.for_kupdate = work->for_kupdate,
.for_background = work->for_background,
@@ -657,7 +668,7 @@ static long wb_writeback(struct bdi_writeback *wb,
};
unsigned long oldest_jif;
long wrote = 0;
- long write_chunk;
+ long write_chunk = MAX_WRITEBACK_PAGES;
struct inode *inode;
if (wbc.for_kupdate) {
@@ -683,9 +694,7 @@ static long wb_writeback(struct bdi_writeback *wb,
* (quickly) tag currently dirty pages
* (maybe slowly) sync all tagged pages
*/
- if (wbc.sync_mode == WB_SYNC_NONE)
- write_chunk = MAX_WRITEBACK_PAGES;
- else
+ if (wbc.sync_mode == WB_SYNC_ALL || wbc.tagged_writepages)
write_chunk = LONG_MAX;
wbc.wb_start = jiffies; /* livelock avoidance */
@@ -1188,10 +1197,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
- .sb = sb,
- .sync_mode = WB_SYNC_NONE,
- .done = &done,
- .nr_pages = nr,
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ .tagged_writepages = 1,
+ .done = &done,
+ .nr_pages = nr,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 99e13346a752..fb6fc9553312 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -259,10 +259,14 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
forget->forget_one.nlookup = nlookup;
spin_lock(&fc->lock);
- fc->forget_list_tail->next = forget;
- fc->forget_list_tail = forget;
- wake_up(&fc->waitq);
- kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ if (fc->connected) {
+ fc->forget_list_tail->next = forget;
+ fc->forget_list_tail = forget;
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ } else {
+ kfree(forget);
+ }
spin_unlock(&fc->lock);
}
@@ -1362,6 +1366,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
if (outarg.namelen > FUSE_NAME_MAX)
goto err;
+ err = -EINVAL;
+ if (size != sizeof(outarg) + outarg.namelen + 1)
+ goto err;
+
name.name = buf;
name.len = outarg.namelen;
err = fuse_copy_one(cs, buf, outarg.namelen + 1);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d6857523336d..4e7f64b705d1 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -13,6 +13,7 @@
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
#include "hfsplus_raw.h"
#define DBG_BNODE_REFS 0x00000001
@@ -110,7 +111,9 @@ struct hfsplus_vh;
struct hfs_btree;
struct hfsplus_sb_info {
+ void *s_vhdr_buf;
struct hfsplus_vh *s_vhdr;
+ void *s_backup_vhdr_buf;
struct hfsplus_vh *s_backup_vhdr;
struct hfs_btree *ext_tree;
struct hfs_btree *cat_tree;
@@ -258,6 +261,15 @@ struct hfsplus_readdir_data {
struct hfsplus_cat_key key;
};
+/*
+ * Find minimum acceptible I/O size for an hfsplus sb.
+ */
+static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
+{
+ return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
+ HFSPLUS_SECTOR_SIZE);
+}
+
#define hfs_btree_open hfsplus_btree_open
#define hfs_btree_close hfsplus_btree_close
#define hfs_btree_write hfsplus_btree_write
@@ -436,8 +448,8 @@ int hfsplus_compare_dentry(const struct dentry *parent,
/* wrapper.c */
int hfsplus_read_wrapper(struct super_block *);
int hfs_part_find(struct super_block *, sector_t *, sector_t *);
-int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
- void *data, int rw);
+int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
+ void *buf, void **data, int rw);
/* time macros */
#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 40ad88c12c64..eb355d81e279 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -88,11 +88,12 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
return -ENOENT;
}
-static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
- sector_t *part_start, sector_t *part_size)
+static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
+ struct new_pmap *pm, sector_t *part_start, sector_t *part_size)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
int size = be32_to_cpu(pm->pmMapBlkCnt);
+ int buf_size = hfsplus_min_io_size(sb);
int res;
int i = 0;
@@ -107,11 +108,14 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
if (++i >= size)
return -ENOENT;
- res = hfsplus_submit_bio(sb->s_bdev,
- *part_start + HFS_PMAP_BLK + i,
- pm, READ);
- if (res)
- return res;
+ pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE);
+ if ((u8 *)pm - (u8 *)buf >= buf_size) {
+ res = hfsplus_submit_bio(sb,
+ *part_start + HFS_PMAP_BLK + i,
+ buf, (void **)&pm, READ);
+ if (res)
+ return res;
+ }
} while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
return -ENOENT;
@@ -124,15 +128,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
int hfs_part_find(struct super_block *sb,
sector_t *part_start, sector_t *part_size)
{
- void *data;
+ void *buf, *data;
int res;
- data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
- if (!data)
+ buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
- res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
- data, READ);
+ res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
+ buf, &data, READ);
if (res)
goto out;
@@ -141,13 +145,13 @@ int hfs_part_find(struct super_block *sb,
res = hfs_parse_old_pmap(sb, data, part_start, part_size);
break;
case HFS_NEW_PMAP_MAGIC:
- res = hfs_parse_new_pmap(sb, data, part_start, part_size);
+ res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size);
break;
default:
res = -ENOENT;
break;
}
out:
- kfree(data);
+ kfree(buf);
return res;
}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 84a47b709f51..c3a76fd25f2f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -197,17 +197,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
write_backup = 1;
}
- error2 = hfsplus_submit_bio(sb->s_bdev,
+ error2 = hfsplus_submit_bio(sb,
sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
- sbi->s_vhdr, WRITE_SYNC);
+ sbi->s_vhdr_buf, NULL, WRITE_SYNC);
if (!error)
error = error2;
if (!write_backup)
goto out;
- error2 = hfsplus_submit_bio(sb->s_bdev,
+ error2 = hfsplus_submit_bio(sb,
sbi->part_start + sbi->sect_count - 2,
- sbi->s_backup_vhdr, WRITE_SYNC);
+ sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
if (!error)
error2 = error;
out:
@@ -251,8 +251,8 @@ static void hfsplus_put_super(struct super_block *sb)
hfs_btree_close(sbi->ext_tree);
iput(sbi->alloc_file);
iput(sbi->hidden_dir);
- kfree(sbi->s_vhdr);
- kfree(sbi->s_backup_vhdr);
+ kfree(sbi->s_vhdr_buf);
+ kfree(sbi->s_backup_vhdr_buf);
unload_nls(sbi->nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
@@ -508,8 +508,8 @@ out_close_cat_tree:
out_close_ext_tree:
hfs_btree_close(sbi->ext_tree);
out_free_vhdr:
- kfree(sbi->s_vhdr);
- kfree(sbi->s_backup_vhdr);
+ kfree(sbi->s_vhdr_buf);
+ kfree(sbi->s_backup_vhdr_buf);
out_unload_nls:
unload_nls(sbi->nls);
unload_nls(nls);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 4ac88ff79aa6..7b8112da285a 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -31,25 +31,67 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
complete(bio->bi_private);
}
-int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
- void *data, int rw)
+/*
+ * hfsplus_submit_bio - Perfrom block I/O
+ * @sb: super block of volume for I/O
+ * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
+ * @buf: buffer for I/O
+ * @data: output pointer for location of requested data
+ * @rw: direction of I/O
+ *
+ * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
+ * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
+ * @data will return a pointer to the start of the requested sector,
+ * which may not be the same location as @buf.
+ *
+ * If @sector is not aligned to the bdev logical block size it will
+ * be rounded down. For writes this means that @buf should contain data
+ * that starts at the rounded-down address. As long as the data was
+ * read using hfsplus_submit_bio() and the same buffer is used things
+ * will work correctly.
+ */
+int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
+ void *buf, void **data, int rw)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct bio *bio;
int ret = 0;
+ unsigned int io_size;
+ loff_t start;
+ int offset;
+
+ /*
+ * Align sector to hardware sector size and find offset. We
+ * assume that io_size is a power of two, which _should_
+ * be true.
+ */
+ io_size = hfsplus_min_io_size(sb);
+ start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
+ offset = start & (io_size - 1);
+ sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio->bi_bdev = sb->s_bdev;
bio->bi_end_io = hfsplus_end_io_sync;
bio->bi_private = &wait;
- /*
- * We always submit one sector at a time, so bio_add_page must not fail.
- */
- if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE,
- offset_in_page(data)) != HFSPLUS_SECTOR_SIZE)
- BUG();
+ if (!(rw & WRITE) && data)
+ *data = (u8 *)buf + offset;
+
+ while (io_size > 0) {
+ unsigned int page_offset = offset_in_page(buf);
+ unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
+ io_size);
+
+ ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
+ if (ret != len) {
+ ret = -EIO;
+ goto out;
+ }
+ io_size -= len;
+ buf = (u8 *)buf + len;
+ }
submit_bio(rw, bio);
wait_for_completion(&wait);
@@ -57,8 +99,9 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
if (!bio_flagged(bio, BIO_UPTODATE))
ret = -EIO;
+out:
bio_put(bio);
- return ret;
+ return ret < 0 ? ret : 0;
}
static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
@@ -147,17 +190,17 @@ int hfsplus_read_wrapper(struct super_block *sb)
}
error = -ENOMEM;
- sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
- if (!sbi->s_vhdr)
+ sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
+ if (!sbi->s_vhdr_buf)
goto out;
- sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL);
- if (!sbi->s_backup_vhdr)
+ sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
+ if (!sbi->s_backup_vhdr_buf)
goto out_free_vhdr;
reread:
- error = hfsplus_submit_bio(sb->s_bdev,
- part_start + HFSPLUS_VOLHEAD_SECTOR,
- sbi->s_vhdr, READ);
+ error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
+ sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
+ READ);
if (error)
goto out_free_backup_vhdr;
@@ -186,9 +229,9 @@ reread:
goto reread;
}
- error = hfsplus_submit_bio(sb->s_bdev,
- part_start + part_size - 2,
- sbi->s_backup_vhdr, READ);
+ error = hfsplus_submit_bio(sb, part_start + part_size - 2,
+ sbi->s_backup_vhdr_buf,
+ (void **)&sbi->s_backup_vhdr, READ);
if (error)
goto out_free_backup_vhdr;
@@ -232,9 +275,9 @@ reread:
return 0;
out_free_backup_vhdr:
- kfree(sbi->s_backup_vhdr);
+ kfree(sbi->s_backup_vhdr_buf);
out_free_vhdr:
- kfree(sbi->s_vhdr);
+ kfree(sbi->s_vhdr_buf);
out:
return error;
}
diff --git a/fs/namei.c b/fs/namei.c
index 14ab8d3f2f0c..b456c7a2e6b4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2582,6 +2582,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
if (!dir->i_op->rmdir)
return -EPERM;
+ dget(dentry);
mutex_lock(&dentry->d_inode->i_mutex);
error = -EBUSY;
@@ -2602,6 +2603,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
out:
mutex_unlock(&dentry->d_inode->i_mutex);
+ dput(dentry);
if (!error)
d_delete(dentry);
return error;
@@ -3005,6 +3007,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
if (error)
return error;
+ dget(new_dentry);
if (target)
mutex_lock(&target->i_mutex);
@@ -3025,6 +3028,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
out:
if (target)
mutex_unlock(&target->i_mutex);
+ dput(new_dentry);
if (!error)
if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
d_move(old_dentry,new_dentry);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b257383bb565..07df5f1d85e5 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
+ int slotid;
};
struct cb_compound_hdr_arg {
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall(
void *dummy, struct cb_process_state *cps);
extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
-extern void nfs4_cb_take_slot(struct nfs_client *clp);
struct cb_devicenotifyitem {
uint32_t cbd_notify_type;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb9..aaa09e948a9c 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -333,7 +333,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
/* Normal */
if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
slot->seq_nr++;
- return htonl(NFS4_OK);
+ goto out_ok;
}
/* Replay */
@@ -352,11 +352,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
/* Wraparound */
if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
slot->seq_nr = 1;
- return htonl(NFS4_OK);
+ goto out_ok;
}
/* Misordered request */
return htonl(NFS4ERR_SEQ_MISORDERED);
+out_ok:
+ tbl->highest_used_slotid = args->csa_slotid;
+ return htonl(NFS4_OK);
}
/*
@@ -418,26 +421,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_sequenceres *res,
struct cb_process_state *cps)
{
+ struct nfs4_slot_table *tbl;
struct nfs_client *clp;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
- cps->clp = NULL;
-
clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
if (clp == NULL)
goto out;
+ tbl = &clp->cl_session->bc_slot_table;
+
+ spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
- status = NFS4ERR_DELAY;
+ spin_unlock(&tbl->slot_tbl_lock);
+ status = htonl(NFS4ERR_DELAY);
+ /* Return NFS4ERR_BADSESSION if we're draining the session
+ * in order to reset it.
+ */
+ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ status = htonl(NFS4ERR_BADSESSION);
goto out;
}
status = validate_seqid(&clp->cl_session->bc_slot_table, args);
+ spin_unlock(&tbl->slot_tbl_lock);
if (status)
goto out;
+ cps->slotid = args->csa_slotid;
+
/*
* Check for pending referring calls. If a match is found, a
* related callback was received before the response to the original
@@ -454,7 +468,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
- nfs4_cb_take_slot(clp);
out:
cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c6c86a77e043..918ad647afea 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
* Let the state manager know callback processing done.
* A single slot, so highest used slotid is either 0 or -1
*/
- tbl->highest_used_slotid--;
+ tbl->highest_used_slotid = -1;
nfs4_check_drain_bc_complete(session);
spin_unlock(&tbl->slot_tbl_lock);
}
-static void nfs4_cb_free_slot(struct nfs_client *clp)
+static void nfs4_cb_free_slot(struct cb_process_state *cps)
{
- if (clp && clp->cl_session)
- nfs4_callback_free_slot(clp->cl_session);
-}
-
-/* A single slot, so highest used slotid is either 0 or -1 */
-void nfs4_cb_take_slot(struct nfs_client *clp)
-{
- struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
-
- spin_lock(&tbl->slot_tbl_lock);
- tbl->highest_used_slotid++;
- BUG_ON(tbl->highest_used_slotid != 0);
- spin_unlock(&tbl->slot_tbl_lock);
+ if (cps->slotid != -1)
+ nfs4_callback_free_slot(cps->clp->cl_session);
}
#else /* CONFIG_NFS_V4_1 */
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
}
-static void nfs4_cb_free_slot(struct nfs_client *clp)
+static void nfs4_cb_free_slot(struct cb_process_state *cps)
{
}
#endif /* CONFIG_NFS_V4_1 */
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_process_state cps = {
.drc_status = 0,
.clp = NULL,
+ .slotid = -1,
};
unsigned int nops = 0;
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
- nfs4_cb_free_slot(cps.clp);
+ nfs4_cb_free_slot(&cps);
nfs_put_client(cps.clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3f10ef..1d1dc1ee3943 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write)
for (i = 0; i < ios->numdevs; i++) {
struct osd_sense_info osi;
struct osd_request *or = ios->per_dev[i].or;
- unsigned dev;
int ret;
if (!or)
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write)
continue; /* we recovered */
}
- dev = ios->per_dev[i].dev;
- objlayout_io_set_result(&ios->ol_state, dev,
- &ios->layout->comps[dev].oc_object_id,
+ objlayout_io_set_result(&ios->ol_state, i,
+ &ios->layout->comps[i].oc_object_id,
osd_pri_2_pnfs_err(osi.osd_err_pri),
ios->per_dev[i].offset,
ios->per_dev[i].length,
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
}
static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
- unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len,
+ unsigned pgbase, struct _objio_per_comp *per_dev, int len,
gfp_t gfp_flags)
{
unsigned pg = *cur_pg;
+ int cur_len = len;
struct request_queue *q =
osd_request_queue(_io_od(ios, per_dev->dev));
- per_dev->length += cur_len;
-
if (per_dev->bio == NULL) {
- unsigned stripes = ios->layout->num_comps /
- ios->layout->mirrors_p1;
- unsigned pages_in_stripe = stripes *
+ unsigned pages_in_stripe = ios->layout->group_width *
(ios->layout->stripe_unit / PAGE_SIZE);
unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
- stripes;
+ ios->layout->group_width;
if (BIO_MAX_PAGES_KMALLOC < bio_size)
bio_size = BIO_MAX_PAGES_KMALLOC;
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
}
BUG_ON(cur_len);
+ per_dev->length += len;
*cur_pg = pg;
return 0;
}
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
int ret = 0;
while (length) {
- struct _objio_per_comp *per_dev = &ios->per_dev[dev];
+ struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
unsigned cur_len, page_off = 0;
if (!per_dev->length) {
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
cur_len = stripe_unit;
}
- if (max_comp < dev)
- max_comp = dev;
+ if (max_comp < dev - first_dev)
+ max_comp = dev - first_dev;
} else {
cur_len = stripe_unit;
}
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
unsigned dev = per_dev->dev;
struct pnfs_osd_object_cred *cred =
- &ios->layout->comps[dev];
+ &ios->layout->comps[cur_comp];
struct osd_obj_id obj = {
.partition = cred->oc_object_id.oid_partition_id,
.id = cred->oc_object_id.oid_object_id,
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
for (; cur_comp < last_comp; ++cur_comp, ++dev) {
struct osd_request *or = NULL;
struct pnfs_osd_object_cred *cred =
- &ios->layout->comps[dev];
+ &ios->layout->comps[cur_comp];
struct osd_obj_id obj = {
.partition = cred->oc_object_id.oid_partition_id,
.id = cred->oc_object_id.oid_object_id,
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index 16fc758e9123..b3918f7ac34d 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
p = _osd_xdr_decode_data_map(p, &layout->olo_map);
layout->olo_comps_index = be32_to_cpup(p++);
layout->olo_num_comps = be32_to_cpup(p++);
+ dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
+ layout->olo_comps_index, layout->olo_num_comps);
+
iter->total_comps = layout->olo_num_comps;
return 0;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 25b6a887adb9..5afaa58a8630 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -877,30 +877,54 @@ struct numa_maps_private {
struct numa_maps md;
};
-static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
+static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
+ unsigned long nr_pages)
{
int count = page_mapcount(page);
- md->pages++;
+ md->pages += nr_pages;
if (pte_dirty || PageDirty(page))
- md->dirty++;
+ md->dirty += nr_pages;
if (PageSwapCache(page))
- md->swapcache++;
+ md->swapcache += nr_pages;
if (PageActive(page) || PageUnevictable(page))
- md->active++;
+ md->active += nr_pages;
if (PageWriteback(page))
- md->writeback++;
+ md->writeback += nr_pages;
if (PageAnon(page))
- md->anon++;
+ md->anon += nr_pages;
if (count > md->mapcount_max)
md->mapcount_max = count;
- md->node[page_to_nid(page)]++;
+ md->node[page_to_nid(page)] += nr_pages;
+}
+
+static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct page *page;
+ int nid;
+
+ if (!pte_present(pte))
+ return NULL;
+
+ page = vm_normal_page(vma, addr, pte);
+ if (!page)
+ return NULL;
+
+ if (PageReserved(page))
+ return NULL;
+
+ nid = page_to_nid(page);
+ if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
+ return NULL;
+
+ return page;
}
static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
@@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *pte;
md = walk->private;
- orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- do {
- struct page *page;
- int nid;
+ spin_lock(&walk->mm->page_table_lock);
+ if (pmd_trans_huge(*pmd)) {
+ if (pmd_trans_splitting(*pmd)) {
+ spin_unlock(&walk->mm->page_table_lock);
+ wait_split_huge_page(md->vma->anon_vma, pmd);
+ } else {
+ pte_t huge_pte = *(pte_t *)pmd;
+ struct page *page;
- if (!pte_present(*pte))
- continue;
+ page = can_gather_numa_stats(huge_pte, md->vma, addr);
+ if (page)
+ gather_stats(page, md, pte_dirty(huge_pte),
+ HPAGE_PMD_SIZE/PAGE_SIZE);
+ spin_unlock(&walk->mm->page_table_lock);
+ return 0;
+ }
+ } else {
+ spin_unlock(&walk->mm->page_table_lock);
+ }
- page = vm_normal_page(md->vma, addr, *pte);
+ orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ do {
+ struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
if (!page)
continue;
-
- if (PageReserved(page))
- continue;
-
- nid = page_to_nid(page);
- if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
- continue;
-
- gather_stats(page, md, pte_dirty(*pte));
+ gather_stats(page, md, pte_dirty(*pte), 1);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(orig_pte, ptl);
@@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
return 0;
md = walk->private;
- gather_stats(page, md, pte_dirty(*pte));
+ gather_stats(page, md, pte_dirty(*pte), 1);
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8633521b3b2e..87315168538d 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,8 @@
#include <linux/ctype.h>
#include <linux/writeback.h>
#include <linux/capability.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include <linux/list_sort.h>
#include <asm/page.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a1a881e68a9a..347cae965e85 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1412,37 +1412,35 @@ xfs_fs_fill_super(
sb->s_time_gran = 1;
set_posix_acl_flag(sb);
- error = xfs_syncd_init(mp);
- if (error)
- goto out_filestream_unmount;
-
xfs_inode_shrinker_register(mp);
error = xfs_mountfs(mp);
if (error)
- goto out_syncd_stop;
+ goto out_filestream_unmount;
+
+ error = xfs_syncd_init(mp);
+ if (error)
+ goto out_unmount;
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = ENOENT;
- goto fail_unmount;
+ goto out_syncd_stop;
}
if (is_bad_inode(root)) {
error = EINVAL;
- goto fail_vnrele;
+ goto out_syncd_stop;
}
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
error = ENOMEM;
- goto fail_vnrele;
+ goto out_iput;
}
return 0;
- out_syncd_stop:
- xfs_inode_shrinker_unregister(mp);
- xfs_syncd_stop(mp);
out_filestream_unmount:
+ xfs_inode_shrinker_unregister(mp);
xfs_filestream_unmount(mp);
out_free_sb:
xfs_freesb(mp);
@@ -1456,17 +1454,12 @@ xfs_fs_fill_super(
out:
return -error;
- fail_vnrele:
- if (sb->s_root) {
- dput(sb->s_root);
- sb->s_root = NULL;
- } else {
- iput(root);
- }
-
- fail_unmount:
- xfs_inode_shrinker_unregister(mp);
+ out_iput:
+ iput(root);
+ out_syncd_stop:
xfs_syncd_stop(mp);
+ out_unmount:
+ xfs_inode_shrinker_unregister(mp);
/*
* Blow away any referenced inode in the filestreams cache.
@@ -1667,24 +1660,13 @@ xfs_init_workqueues(void)
*/
xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
if (!xfs_syncd_wq)
- goto out;
-
- xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
- if (!xfs_ail_wq)
- goto out_destroy_syncd;
-
+ return -ENOMEM;
return 0;
-
-out_destroy_syncd:
- destroy_workqueue(xfs_syncd_wq);
-out:
- return -ENOMEM;
}
STATIC void
xfs_destroy_workqueues(void)
{
- destroy_workqueue(xfs_ail_wq);
destroy_workqueue(xfs_syncd_wq);
}
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 9e0e2fa3f2c8..8126fc2ea631 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
* search the buffer cache can be a time consuming thing, and AIL lock is a
* spinlock.
*/
-STATIC void
+STATIC bool
xfs_qm_dquot_logitem_pushbuf(
struct xfs_log_item *lip)
{
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_buf *bp;
+ bool ret = true;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
@@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
if (completion_done(&dqp->q_flush) ||
!(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_dqunlock(dqp);
- return;
+ return true;
}
bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
xfs_dqunlock(dqp);
if (!bp)
- return;
+ return true;
if (XFS_BUF_ISDELAYWRITE(bp))
xfs_buf_delwri_promote(bp);
+ if (XFS_BUF_ISPINNED(bp))
+ ret = false;
xfs_buf_relse(bp);
+ return ret;
}
/*
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7b7e005e3dcc..a7342e840d77 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -632,7 +632,7 @@ xfs_buf_item_push(
* the xfsbufd to get this buffer written. We have to unlock the buffer
* to allow the xfsbufd to write it, too.
*/
-STATIC void
+STATIC bool
xfs_buf_item_pushbuf(
struct xfs_log_item *lip)
{
@@ -646,6 +646,7 @@ xfs_buf_item_pushbuf(
xfs_buf_delwri_promote(bp);
xfs_buf_relse(bp);
+ return true;
}
STATIC void
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b1e88d56069c..391044c62d5e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -713,13 +713,14 @@ xfs_inode_item_committed(
* marked delayed write. If that's the case, we'll promote it and that will
* allow the caller to write the buffer by triggering the xfsbufd to run.
*/
-STATIC void
+STATIC bool
xfs_inode_item_pushbuf(
struct xfs_log_item *lip)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
struct xfs_buf *bp;
+ bool ret = true;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
@@ -730,7 +731,7 @@ xfs_inode_item_pushbuf(
if (completion_done(&ip->i_flush) ||
!(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return;
+ return true;
}
bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
@@ -738,10 +739,13 @@ xfs_inode_item_pushbuf(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!bp)
- return;
+ return true;
if (XFS_BUF_ISDELAYWRITE(bp))
xfs_buf_delwri_promote(bp);
+ if (XFS_BUF_ISPINNED(bp))
+ ret = false;
xfs_buf_relse(bp);
+ return ret;
}
/*
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c83f63b33aae..efc147f0e9b6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1426,6 +1426,7 @@ xfs_trans_committed(
static inline void
xfs_log_item_batch_insert(
struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items,
int nr_items,
xfs_lsn_t commit_lsn)
@@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
spin_lock(&ailp->xa_lock);
/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
- xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn);
+ xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
for (i = 0; i < nr_items; i++)
IOP_UNPIN(log_items[i], 0);
@@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
* as an iclog write error even though we haven't started any IO yet. Hence in
* this case all we need to do is IOP_COMMITTED processing, followed by an
* IOP_UNPIN(aborted) call.
+ *
+ * The AIL cursor is used to optimise the insert process. If commit_lsn is not
+ * at the end of the AIL, the insert cursor avoids the need to walk
+ * the AIL to find the insertion point on every xfs_log_item_batch_insert()
+ * call. This saves a lot of needless list walking and is a net win, even
+ * though it slightly increases that amount of AIL lock traffic to set it up
+ * and tear it down.
*/
void
xfs_trans_committed_bulk(
@@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
#define LOG_ITEM_BATCH_SIZE 32
struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
struct xfs_log_vec *lv;
+ struct xfs_ail_cursor cur;
int i = 0;
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
+ spin_unlock(&ailp->xa_lock);
+
/* unpin all the log items */
for (lv = log_vector; lv; lv = lv->lv_next ) {
struct xfs_log_item *lip = lv->lv_item;
@@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
/*
* Not a bulk update option due to unusual item_lsn.
* Push into AIL immediately, rechecking the lsn once
- * we have the ail lock. Then unpin the item.
+ * we have the ail lock. Then unpin the item. This does
+ * not affect the AIL cursor the bulk insert path is
+ * using.
*/
spin_lock(&ailp->xa_lock);
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
@@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
/* Item is a candidate for bulk AIL insert. */
log_items[i++] = lv->lv_item;
if (i >= LOG_ITEM_BATCH_SIZE) {
- xfs_log_item_batch_insert(ailp, log_items,
+ xfs_log_item_batch_insert(ailp, &cur, log_items,
LOG_ITEM_BATCH_SIZE, commit_lsn);
i = 0;
}
@@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
/* make sure we insert the remainder! */
if (i)
- xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn);
+ xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
+
+ spin_lock(&ailp->xa_lock);
+ xfs_trans_ail_cursor_done(ailp, &cur);
+ spin_unlock(&ailp->xa_lock);
}
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 06a9759b6352..53597f4db9b5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
void (*iop_unlock)(xfs_log_item_t *);
xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
void (*iop_push)(xfs_log_item_t *);
- void (*iop_pushbuf)(xfs_log_item_t *);
+ bool (*iop_pushbuf)(xfs_log_item_t *);
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
} xfs_item_ops_t;
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 5fc2380092c8..a4c281bf7a9b 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,6 @@
#include "xfs_trans_priv.h"
#include "xfs_error.h"
-struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
-
#ifdef DEBUG
/*
* Check that the list is sorted as it should be.
@@ -272,9 +270,9 @@ xfs_trans_ail_cursor_clear(
}
/*
- * Return the item in the AIL with the current lsn.
- * Return the current tree generation number for use
- * in calls to xfs_trans_next_ail().
+ * Initialise the cursor to the first item in the AIL with the given @lsn.
+ * This searches the list from lowest LSN to highest. Pass a @lsn of zero
+ * to initialise the cursor to the first item in the AIL.
*/
xfs_log_item_t *
xfs_trans_ail_cursor_first(
@@ -300,31 +298,97 @@ out:
}
/*
- * splice the log item list into the AIL at the given LSN.
+ * Initialise the cursor to the last item in the AIL with the given @lsn.
+ * This searches the list from highest LSN to lowest. If there is no item with
+ * the value of @lsn, then it sets the cursor to the last item with an LSN lower
+ * than @lsn.
+ */
+static struct xfs_log_item *
+__xfs_trans_ail_cursor_last(
+ struct xfs_ail *ailp,
+ xfs_lsn_t lsn)
+{
+ xfs_log_item_t *lip;
+
+ list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
+ if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
+ return lip;
+ }
+ return NULL;
+}
+
+/*
+ * Initialise the cursor to the last item in the AIL with the given @lsn.
+ * This searches the list from highest LSN to lowest.
+ */
+struct xfs_log_item *
+xfs_trans_ail_cursor_last(
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ xfs_lsn_t lsn)
+{
+ xfs_trans_ail_cursor_init(ailp, cur);
+ cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
+ return cur->item;
+}
+
+/*
+ * splice the log item list into the AIL at the given LSN. We splice to the
+ * tail of the given LSN to maintain insert order for push traversals. The
+ * cursor is optional, allowing repeated updates to the same LSN to avoid
+ * repeated traversals.
*/
static void
xfs_ail_splice(
- struct xfs_ail *ailp,
- struct list_head *list,
- xfs_lsn_t lsn)
+ struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ struct list_head *list,
+ xfs_lsn_t lsn)
{
- xfs_log_item_t *next_lip;
+ struct xfs_log_item *lip = cur ? cur->item : NULL;
+ struct xfs_log_item *next_lip;
- /* If the list is empty, just insert the item. */
- if (list_empty(&ailp->xa_ail)) {
- list_splice(list, &ailp->xa_ail);
- return;
+ /*
+ * Get a new cursor if we don't have a placeholder or the existing one
+ * has been invalidated.
+ */
+ if (!lip || (__psint_t)lip & 1) {
+ lip = __xfs_trans_ail_cursor_last(ailp, lsn);
+
+ if (!lip) {
+ /* The list is empty, so just splice and return. */
+ if (cur)
+ cur->item = NULL;
+ list_splice(list, &ailp->xa_ail);
+ return;
+ }
}
- list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
- if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
- break;
+ /*
+ * Our cursor points to the item we want to insert _after_, so we have
+ * to update the cursor to point to the end of the list we are splicing
+ * in so that it points to the correct location for the next splice.
+ * i.e. before the splice
+ *
+ * lsn -> lsn -> lsn + x -> lsn + x ...
+ * ^
+ * | cursor points here
+ *
+ * After the splice we have:
+ *
+ * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
+ * ^ ^
+ * | cursor points here | needs to move here
+ *
+ * So we set the cursor to the last item in the list to be spliced
+ * before we execute the splice, resulting in the cursor pointing to
+ * the correct item after the splice occurs.
+ */
+ if (cur) {
+ next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
+ cur->item = next_lip;
}
-
- ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
- XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
-
- list_splice_init(list, &next_lip->li_ail);
+ list_splice(list, &lip->li_ail);
}
/*
@@ -340,16 +404,10 @@ xfs_ail_delete(
xfs_trans_ail_cursor_clear(ailp, lip);
}
-/*
- * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
- * to run at a later time if there is more work to do to complete the push.
- */
-STATIC void
-xfs_ail_worker(
- struct work_struct *work)
+static long
+xfsaild_push(
+ struct xfs_ail *ailp)
{
- struct xfs_ail *ailp = container_of(to_delayed_work(work),
- struct xfs_ail, xa_work);
xfs_mount_t *mp = ailp->xa_mount;
struct xfs_ail_cursor *cur = &ailp->xa_cursors;
xfs_log_item_t *lip;
@@ -412,8 +470,13 @@ xfs_ail_worker(
case XFS_ITEM_PUSHBUF:
XFS_STATS_INC(xs_push_ail_pushbuf);
- IOP_PUSHBUF(lip);
- ailp->xa_last_pushed_lsn = lsn;
+
+ if (!IOP_PUSHBUF(lip)) {
+ stuck++;
+ flush_log = 1;
+ } else {
+ ailp->xa_last_pushed_lsn = lsn;
+ }
push_xfsbufd = 1;
break;
@@ -425,7 +488,6 @@ xfs_ail_worker(
case XFS_ITEM_LOCKED:
XFS_STATS_INC(xs_push_ail_locked);
- ailp->xa_last_pushed_lsn = lsn;
stuck++;
break;
@@ -486,20 +548,6 @@ out_done:
/* We're past our target or empty, so idle */
ailp->xa_last_pushed_lsn = 0;
- /*
- * We clear the XFS_AIL_PUSHING_BIT first before checking
- * whether the target has changed. If the target has changed,
- * this pushes the requeue race directly onto the result of the
- * atomic test/set bit, so we are guaranteed that either the
- * the pusher that changed the target or ourselves will requeue
- * the work (but not both).
- */
- clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
- smp_rmb();
- if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
- test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
- return;
-
tout = 50;
} else if (XFS_LSN_CMP(lsn, target) >= 0) {
/*
@@ -522,9 +570,30 @@ out_done:
tout = 20;
}
- /* There is more to do, requeue us. */
- queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
- msecs_to_jiffies(tout));
+ return tout;
+}
+
+static int
+xfsaild(
+ void *data)
+{
+ struct xfs_ail *ailp = data;
+ long tout = 0; /* milliseconds */
+
+ while (!kthread_should_stop()) {
+ if (tout && tout <= 20)
+ __set_current_state(TASK_KILLABLE);
+ else
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(tout ?
+ msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+
+ try_to_freeze();
+
+ tout = xfsaild_push(ailp);
+ }
+
+ return 0;
}
/*
@@ -559,8 +628,9 @@ xfs_ail_push(
*/
smp_wmb();
xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
- if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
- queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
+ smp_wmb();
+
+ wake_up_process(ailp->xa_task);
}
/*
@@ -645,6 +715,7 @@ xfs_trans_unlocked_item(
void
xfs_trans_ail_update_bulk(
struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items,
int nr_items,
xfs_lsn_t lsn) __releases(ailp->xa_lock)
@@ -674,7 +745,7 @@ xfs_trans_ail_update_bulk(
list_add(&lip->li_ail, &tmp);
}
- xfs_ail_splice(ailp, &tmp, lsn);
+ xfs_ail_splice(ailp, cur, &tmp, lsn);
if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
@@ -794,9 +865,18 @@ xfs_trans_ail_init(
ailp->xa_mount = mp;
INIT_LIST_HEAD(&ailp->xa_ail);
spin_lock_init(&ailp->xa_lock);
- INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
+
+ ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
+ ailp->xa_mount->m_fsname);
+ if (IS_ERR(ailp->xa_task))
+ goto out_free_ailp;
+
mp->m_ail = ailp;
return 0;
+
+out_free_ailp:
+ kmem_free(ailp);
+ return ENOMEM;
}
void
@@ -805,6 +885,6 @@ xfs_trans_ail_destroy(
{
struct xfs_ail *ailp = mp->m_ail;
- cancel_delayed_work_sync(&ailp->xa_work);
+ kthread_stop(ailp->xa_task);
kmem_free(ailp);
}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 6b164e9e9a1f..fe2e3cbc2f95 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -64,24 +64,19 @@ struct xfs_ail_cursor {
*/
struct xfs_ail {
struct xfs_mount *xa_mount;
+ struct task_struct *xa_task;
struct list_head xa_ail;
xfs_lsn_t xa_target;
struct xfs_ail_cursor xa_cursors;
spinlock_t xa_lock;
- struct delayed_work xa_work;
xfs_lsn_t xa_last_pushed_lsn;
- unsigned long xa_flags;
};
-#define XFS_AIL_PUSHING_BIT 0
-
/*
* From xfs_trans_ail.c
*/
-
-extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
-
void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items, int nr_items,
xfs_lsn_t lsn) __releases(ailp->xa_lock);
static inline void
@@ -90,7 +85,7 @@ xfs_trans_ail_update(
struct xfs_log_item *lip,
xfs_lsn_t lsn) __releases(ailp->xa_lock)
{
- xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn);
+ xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
@@ -111,10 +106,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
void xfs_trans_unlocked_item(struct xfs_ail *,
xfs_log_item_t *);
-struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
+struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
+ struct xfs_ail_cursor *cur,
+ xfs_lsn_t lsn);
+struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur,
xfs_lsn_t lsn);
-struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
+struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);
void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);