summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig17
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs4/autofs_i.h14
-rw-r--r--fs/autofs4/expire.c4
-rw-r--r--fs/autofs4/inode.c88
-rw-r--r--fs/autofs4/root.c74
-rw-r--r--fs/autofs4/symlink.c3
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/compression.c329
-rw-r--r--fs/btrfs/compression.h72
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h48
-rw-r--r--fs/btrfs/disk-io.c412
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c90
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/extent_io.h17
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file.c126
-rw-r--r--fs/btrfs/inode.c201
-rw-r--r--fs/btrfs/ioctl.c220
-rw-r--r--fs/btrfs/ioctl.h12
-rw-r--r--fs/btrfs/lzo.c420
-rw-r--r--fs/btrfs/ordered-data.c18
-rw-r--r--fs/btrfs/ordered-data.h8
-rw-r--r--fs/btrfs/super.c281
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/volumes.c626
-rw-r--r--fs/btrfs/volumes.h27
-rw-r--r--fs/btrfs/xattr.c18
-rw-r--r--fs/btrfs/zlib.c369
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/netmisc.c4
-rw-r--r--fs/compat.c10
-rw-r--r--fs/ecryptfs/crypto.c30
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/file.c28
-rw-r--r--fs/ecryptfs/inode.c32
-rw-r--r--fs/ecryptfs/keystore.c26
-rw-r--r--fs/ecryptfs/main.c9
-rw-r--r--fs/ecryptfs/mmap.c35
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c11
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/gfs2/file.c258
-rw-r--r--fs/gfs2/inode.c72
-rw-r--r--fs/gfs2/inode.h1
-rw-r--r--fs/gfs2/ops_inode.c258
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/internal.h3
-rw-r--r--fs/ioctl.c10
-rw-r--r--fs/jffs2/build.c5
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jffs2/xattr.c12
-rw-r--r--fs/namei.c35
-rw-r--r--fs/namespace.c63
-rw-r--r--fs/ocfs2/file.c10
-rw-r--r--fs/open.c4
-rw-r--r--fs/pipe.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c57
-rw-r--r--fs/xfs/support/debug.c6
69 files changed, 3189 insertions, 1410 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 771f457402d4..9a7921ae4763 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -30,15 +30,6 @@ config FS_MBCACHE
source "fs/reiserfs/Kconfig"
source "fs/jfs/Kconfig"
-config FS_POSIX_ACL
-# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4)
-#
-# NOTE: you can implement Posix ACLs without these helpers (XFS does).
-# Never use this symbol for ifdefs.
-#
- bool
- default n
-
source "fs/xfs/Kconfig"
source "fs/gfs2/Kconfig"
source "fs/ocfs2/Kconfig"
@@ -47,6 +38,14 @@ source "fs/nilfs2/Kconfig"
endif # BLOCK
+# Posix ACL utility routines
+#
+# Note: Posix ACLs can be implemented without these helpers. Never use
+# this symbol for ifdefs in core code.
+#
+config FS_POSIX_ACL
+ def_bool n
+
config EXPORTFS
tristate
diff --git a/fs/aio.c b/fs/aio.c
index 5e00f15c54aa..fc557a3be0a9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -87,7 +87,7 @@ static int __init aio_setup(void)
aio_wq = create_workqueue("aio");
abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
- BUG_ON(!abe_pool);
+ BUG_ON(!aio_wq || !abe_pool);
pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 1f016bfb42d5..54f923792728 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -88,14 +88,6 @@ struct autofs_info {
uid_t uid;
gid_t gid;
-
- mode_t mode;
- size_t size;
-
- void (*free)(struct autofs_info *);
- union {
- const char *symlink;
- } u;
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
@@ -175,7 +167,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
return 0;
}
-struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *);
+struct inode *autofs4_get_inode(struct super_block *, mode_t);
void autofs4_free_ino(struct autofs_info *);
/* Expiration */
@@ -285,7 +277,8 @@ static inline void managed_dentry_clear_managed(struct dentry *dentry)
/* Initializing function */
int autofs4_fill_super(struct super_block *, void *, int);
-struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode);
+struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
+void autofs4_clean_ino(struct autofs_info *);
/* Queue management functions */
@@ -345,5 +338,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry)
return;
}
-void autofs4_dentry_release(struct dentry *);
extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3ed79d76c233..f43100b9662b 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -96,7 +96,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev,
struct dentry *p, *ret;
if (prev == NULL)
- return dget(prev);
+ return dget(root);
spin_lock(&autofs4_lock);
relock:
@@ -133,7 +133,7 @@ again:
spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
if (!simple_positive(ret)) {
- spin_unlock(&ret->d_lock);
+ spin_unlock(&p->d_lock);
p = ret;
goto again;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 9e1a9dad23e1..180fa2425e49 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -22,65 +22,27 @@
#include "autofs_i.h"
#include <linux/module.h>
-static void ino_lnkfree(struct autofs_info *ino)
+struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
{
- if (ino->u.symlink) {
- kfree(ino->u.symlink);
- ino->u.symlink = NULL;
- }
-}
-
-struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
- struct autofs_sb_info *sbi, mode_t mode)
-{
- int reinit = 1;
-
- if (ino == NULL) {
- reinit = 0;
- ino = kmalloc(sizeof(*ino), GFP_KERNEL);
- }
-
- if (ino == NULL)
- return NULL;
-
- if (!reinit) {
- ino->flags = 0;
- ino->dentry = NULL;
- ino->size = 0;
+ struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL);
+ if (ino) {
INIT_LIST_HEAD(&ino->active);
- ino->active_count = 0;
INIT_LIST_HEAD(&ino->expiring);
- atomic_set(&ino->count, 0);
+ ino->last_used = jiffies;
+ ino->sbi = sbi;
}
+ return ino;
+}
+void autofs4_clean_ino(struct autofs_info *ino)
+{
ino->uid = 0;
ino->gid = 0;
- ino->mode = mode;
ino->last_used = jiffies;
-
- ino->sbi = sbi;
-
- if (reinit && ino->free)
- (ino->free)(ino);
-
- memset(&ino->u, 0, sizeof(ino->u));
-
- ino->free = NULL;
-
- if (S_ISLNK(mode))
- ino->free = ino_lnkfree;
-
- return ino;
}
void autofs4_free_ino(struct autofs_info *ino)
{
- if (ino->dentry) {
- ino->dentry->d_fsdata = NULL;
- ino->dentry = NULL;
- }
- if (ino->free)
- (ino->free)(ino);
kfree(ino);
}
@@ -136,9 +98,16 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
+static void autofs4_evict_inode(struct inode *inode)
+{
+ end_writeback(inode);
+ kfree(inode->i_private);
+}
+
static const struct super_operations autofs4_sops = {
.statfs = simple_statfs,
.show_options = autofs4_show_options,
+ .evict_inode = autofs4_evict_inode,
};
enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
@@ -228,17 +197,6 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
return (*pipefd < 0);
}
-static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
-{
- struct autofs_info *ino;
-
- ino = autofs4_init_ino(NULL, sbi, S_IFDIR | 0755);
- if (!ino)
- return NULL;
-
- return ino;
-}
-
int autofs4_fill_super(struct super_block *s, void *data, int silent)
{
struct inode * root_inode;
@@ -282,10 +240,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
/*
* Get the root inode and dentry, but defer checking for errors.
*/
- ino = autofs4_mkroot(sbi);
+ ino = autofs4_new_ino(sbi);
if (!ino)
goto fail_free;
- root_inode = autofs4_get_inode(s, ino);
+ root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
if (!root_inode)
goto fail_ino;
@@ -368,15 +326,14 @@ fail_unlock:
return -EINVAL;
}
-struct inode *autofs4_get_inode(struct super_block *sb,
- struct autofs_info *inf)
+struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
{
struct inode *inode = new_inode(sb);
if (inode == NULL)
return NULL;
- inode->i_mode = inf->mode;
+ inode->i_mode = mode;
if (sb->s_root) {
inode->i_uid = sb->s_root->d_inode->i_uid;
inode->i_gid = sb->s_root->d_inode->i_gid;
@@ -384,12 +341,11 @@ struct inode *autofs4_get_inode(struct super_block *sb,
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = get_next_ino();
- if (S_ISDIR(inf->mode)) {
+ if (S_ISDIR(mode)) {
inode->i_nlink = 2;
inode->i_op = &autofs4_dir_inode_operations;
inode->i_fop = &autofs4_dir_operations;
- } else if (S_ISLNK(inf->mode)) {
- inode->i_size = inf->size;
+ } else if (S_ISLNK(mode)) {
inode->i_op = &autofs4_symlink_inode_operations;
}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 1dba035fc376..014e7aba3b08 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -37,6 +37,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file);
static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
static struct vfsmount *autofs4_d_automount(struct path *);
static int autofs4_d_manage(struct dentry *, bool, bool);
+static void autofs4_dentry_release(struct dentry *);
const struct file_operations autofs4_root_operations = {
.open = dcache_dir_open,
@@ -138,25 +139,26 @@ out:
return dcache_dir_open(inode, file);
}
-void autofs4_dentry_release(struct dentry *de)
+static void autofs4_dentry_release(struct dentry *de)
{
- struct autofs_info *inf;
+ struct autofs_info *ino = autofs4_dentry_ino(de);
+ struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
DPRINTK("releasing %p", de);
- inf = autofs4_dentry_ino(de);
- if (inf) {
- struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
- if (sbi) {
- spin_lock(&sbi->lookup_lock);
- if (!list_empty(&inf->active))
- list_del(&inf->active);
- if (!list_empty(&inf->expiring))
- list_del(&inf->expiring);
- spin_unlock(&sbi->lookup_lock);
- }
- autofs4_free_ino(inf);
+ if (!ino)
+ return;
+
+ if (sbi) {
+ spin_lock(&sbi->lookup_lock);
+ if (!list_empty(&ino->active))
+ list_del(&ino->active);
+ if (!list_empty(&ino->expiring))
+ list_del(&ino->expiring);
+ spin_unlock(&sbi->lookup_lock);
}
+
+ autofs4_free_ino(ino);
}
static struct dentry *autofs4_lookup_active(struct dentry *dentry)
@@ -488,7 +490,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
sbi = autofs4_sbi(dir->i_sb);
DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
- current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
+ current->pid, task_pgrp_nr(current), sbi->catatonic,
+ autofs4_oz_mode(sbi));
active = autofs4_lookup_active(dentry);
if (active) {
@@ -507,7 +510,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent))
__managed_dentry_set_managed(dentry);
- ino = autofs4_init_ino(NULL, sbi, 0555);
+ ino = autofs4_new_ino(sbi);
if (!ino)
return ERR_PTR(-ENOMEM);
@@ -529,6 +532,7 @@ static int autofs4_dir_symlink(struct inode *dir,
struct autofs_info *ino = autofs4_dentry_ino(dentry);
struct autofs_info *p_ino;
struct inode *inode;
+ size_t size = strlen(symname);
char *cp;
DPRINTK("%s <- %.*s", symname,
@@ -537,39 +541,35 @@ static int autofs4_dir_symlink(struct inode *dir,
if (!autofs4_oz_mode(sbi))
return -EACCES;
- ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
- if (!ino)
- return -ENOMEM;
+ BUG_ON(!ino);
+
+ autofs4_clean_ino(ino);
autofs4_del_active(dentry);
- ino->size = strlen(symname);
- cp = kmalloc(ino->size + 1, GFP_KERNEL);
- if (!cp) {
- if (!dentry->d_fsdata)
- kfree(ino);
+ cp = kmalloc(size + 1, GFP_KERNEL);
+ if (!cp)
return -ENOMEM;
- }
strcpy(cp, symname);
- inode = autofs4_get_inode(dir->i_sb, ino);
+ inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555);
if (!inode) {
kfree(cp);
if (!dentry->d_fsdata)
kfree(ino);
return -ENOMEM;
}
+ inode->i_private = cp;
+ inode->i_size = size;
d_add(dentry, inode);
- dentry->d_fsdata = ino;
- ino->dentry = dget(dentry);
+ dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
atomic_inc(&p_ino->count);
- ino->u.symlink = cp;
dir->i_mtime = CURRENT_TIME;
return 0;
@@ -732,25 +732,21 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
DPRINTK("dentry %p, creating %.*s",
dentry, dentry->d_name.len, dentry->d_name.name);
- ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
- if (!ino)
- return -ENOMEM;
+ BUG_ON(!ino);
+
+ autofs4_clean_ino(ino);
autofs4_del_active(dentry);
- inode = autofs4_get_inode(dir->i_sb, ino);
- if (!inode) {
- if (!dentry->d_fsdata)
- kfree(ino);
+ inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555);
+ if (!inode)
return -ENOMEM;
- }
d_add(dentry, inode);
if (sbi->version < 5)
autofs_set_leaf_automount_flags(dentry);
- dentry->d_fsdata = ino;
- ino->dentry = dget(dentry);
+ dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index b4ea82934d2e..f27c094a1919 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -14,8 +14,7 @@
static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
- nd_set_link(nd, (char *)ino->u.symlink);
+ nd_set_link(nd, dentry->d_inode->i_private);
return NULL;
}
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 7bb3c020e570..ecb9fd3be143 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,8 @@ config BTRFS_FS
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a35eb36b32fd..31610ea73aec 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- export.o tree-log.o acl.o free-space-cache.o zlib.o \
+ export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6ae2c8cac9d5..15b5ca2a2606 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
- if (IS_ERR(acl))
+ if (IS_ERR(acl)) {
+ kfree(value);
return acl;
+ }
set_cached_acl(inode, type, acl);
}
kfree(value);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6ad63f17eca0..ccc991c542df 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -157,7 +157,7 @@ struct btrfs_inode {
/*
* always compress this one file
*/
- unsigned force_compress:1;
+ unsigned force_compress:4;
struct inode vfs_inode;
};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b50bc4bd5c56..f745287fbf2e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -62,6 +62,9 @@ struct compressed_bio {
/* number of bytes on disk */
unsigned long compressed_len;
+ /* the compression algorithm for this bio */
+ int compress_type;
+
/* number of compressed pages in the array */
unsigned long nr_pages;
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
- cb->start,
- cb->orig_bio->bi_io_vec,
- cb->orig_bio->bi_vcnt,
- cb->compressed_len);
+ ret = btrfs_decompress_biovec(cb->compress_type,
+ cb->compressed_pages,
+ cb->start,
+ cb->orig_bio->bi_io_vec,
+ cb->orig_bio->bi_vcnt,
+ cb->compressed_len);
csum_failed:
if (ret)
cb->errors = 1;
@@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = uncompressed_len;
cb->compressed_len = compressed_len;
+ cb->compress_type = extent_compress_type(bio_flags);
cb->orig_bio = bio;
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_put(comp_bio);
return 0;
}
+
+static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
+static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
+static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
+static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
+static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
+
+struct btrfs_compress_op *btrfs_compress_op[] = {
+ &btrfs_zlib_compress,
+ &btrfs_lzo_compress,
+};
+
+int __init btrfs_init_compress(void)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+ INIT_LIST_HEAD(&comp_idle_workspace[i]);
+ spin_lock_init(&comp_workspace_lock[i]);
+ atomic_set(&comp_alloc_workspace[i], 0);
+ init_waitqueue_head(&comp_workspace_wait[i]);
+ }
+ return 0;
+}
+
+/*
+ * this finds an available workspace or allocates a new one
+ * ERR_PTR is returned if things go bad.
+ */
+static struct list_head *find_workspace(int type)
+{
+ struct list_head *workspace;
+ int cpus = num_online_cpus();
+ int idx = type - 1;
+
+ struct list_head *idle_workspace = &comp_idle_workspace[idx];
+ spinlock_t *workspace_lock = &comp_workspace_lock[idx];
+ atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
+ wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
+ int *num_workspace = &comp_num_workspace[idx];
+again:
+ spin_lock(workspace_lock);
+ if (!list_empty(idle_workspace)) {
+ workspace = idle_workspace->next;
+ list_del(workspace);
+ (*num_workspace)--;
+ spin_unlock(workspace_lock);
+ return workspace;
+
+ }
+ if (atomic_read(alloc_workspace) > cpus) {
+ DEFINE_WAIT(wait);
+
+ spin_unlock(workspace_lock);
+ prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
+ if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
+ schedule();
+ finish_wait(workspace_wait, &wait);
+ goto again;
+ }
+ atomic_inc(alloc_workspace);
+ spin_unlock(workspace_lock);
+
+ workspace = btrfs_compress_op[idx]->alloc_workspace();
+ if (IS_ERR(workspace)) {
+ atomic_dec(alloc_workspace);
+ wake_up(workspace_wait);
+ }
+ return workspace;
+}
+
+/*
+ * put a workspace struct back on the list or free it if we have enough
+ * idle ones sitting around
+ */
+static void free_workspace(int type, struct list_head *workspace)
+{
+ int idx = type - 1;
+ struct list_head *idle_workspace = &comp_idle_workspace[idx];
+ spinlock_t *workspace_lock = &comp_workspace_lock[idx];
+ atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
+ wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
+ int *num_workspace = &comp_num_workspace[idx];
+
+ spin_lock(workspace_lock);
+ if (*num_workspace < num_online_cpus()) {
+ list_add_tail(workspace, idle_workspace);
+ (*num_workspace)++;
+ spin_unlock(workspace_lock);
+ goto wake;
+ }
+ spin_unlock(workspace_lock);
+
+ btrfs_compress_op[idx]->free_workspace(workspace);
+ atomic_dec(alloc_workspace);
+wake:
+ if (waitqueue_active(workspace_wait))
+ wake_up(workspace_wait);
+}
+
+/*
+ * cleanup function for module exit
+ */
+static void free_workspaces(void)
+{
+ struct list_head *workspace;
+ int i;
+
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+ while (!list_empty(&comp_idle_workspace[i])) {
+ workspace = comp_idle_workspace[i].next;
+ list_del(workspace);
+ btrfs_compress_op[i]->free_workspace(workspace);
+ atomic_dec(&comp_alloc_workspace[i]);
+ }
+ }
+}
+
+/*
+ * given an address space and start/len, compress the bytes.
+ *
+ * pages are allocated to hold the compressed result and stored
+ * in 'pages'
+ *
+ * out_pages is used to return the number of pages allocated. There
+ * may be pages allocated even if we return an error
+ *
+ * total_in is used to return the number of bytes actually read. It
+ * may be smaller then len if we had to exit early because we
+ * ran out of room in the pages array or because we cross the
+ * max_out threshold.
+ *
+ * total_out is used to return the total number of compressed bytes
+ *
+ * max_out tells us the max number of bytes that we're allowed to
+ * stuff into pages
+ */
+int btrfs_compress_pages(int type, struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -1;
+
+ ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
+ start, len, pages,
+ nr_dest_pages, out_pages,
+ total_in, total_out,
+ max_out);
+ free_workspace(type, workspace);
+ return ret;
+}
+
+/*
+ * pages_in is an array of pages with compressed data.
+ *
+ * disk_start is the starting logical offset of this array in the file
+ *
+ * bvec is a bio_vec of pages from the file that we want to decompress into
+ *
+ * vcnt is the count of pages in the biovec
+ *
+ * srclen is the number of bytes in pages_in
+ *
+ * The basic idea is that we have a bio that was created by readpages.
+ * The pages in the bio are for the uncompressed data, and they may not
+ * be contiguous. They all correspond to the range of bytes covered by
+ * the compressed extent.
+ */
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+ struct bio_vec *bvec, int vcnt, size_t srclen)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -ENOMEM;
+
+ ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
+ disk_start,
+ bvec, vcnt, srclen);
+ free_workspace(type, workspace);
+ return ret;
+}
+
+/*
+ * a less complex decompression routine. Our compressed data fits in a
+ * single page, and we want to read a single page out of it.
+ * start_byte tells us the offset into the compressed data we're interested in
+ */
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -ENOMEM;
+
+ ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
+ dest_page, start_byte,
+ srclen, destlen);
+
+ free_workspace(type, workspace);
+ return ret;
+}
+
+void __exit btrfs_exit_compress(void)
+{
+ free_workspaces();
+}
+
+/*
+ * Copy uncompressed data from working buffer to pages.
+ *
+ * buf_start is the byte offset we're of the start of our workspace buffer.
+ *
+ * total_out is the last byte of the buffer
+ */
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+ unsigned long total_out, u64 disk_start,
+ struct bio_vec *bvec, int vcnt,
+ unsigned long *page_index,
+ unsigned long *pg_offset)
+{
+ unsigned long buf_offset;
+ unsigned long current_buf_start;
+ unsigned long start_byte;
+ unsigned long working_bytes = total_out - buf_start;
+ unsigned long bytes;
+ char *kaddr;
+ struct page *page_out = bvec[*page_index].bv_page;
+
+ /*
+ * start byte is the first byte of the page we're currently
+ * copying into relative to the start of the compressed data.
+ */
+ start_byte = page_offset(page_out) - disk_start;
+
+ /* we haven't yet hit data corresponding to this page */
+ if (total_out <= start_byte)
+ return 1;
+
+ /*
+ * the start of the data we care about is offset into
+ * the middle of our working buffer
+ */
+ if (total_out > start_byte && buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes -= buf_offset;
+ } else {
+ buf_offset = 0;
+ }
+ current_buf_start = buf_start;
+
+ /* copy bytes from the working buffer into the pages */
+ while (working_bytes > 0) {
+ bytes = min(PAGE_CACHE_SIZE - *pg_offset,
+ PAGE_CACHE_SIZE - buf_offset);
+ bytes = min(bytes, working_bytes);
+ kaddr = kmap_atomic(page_out, KM_USER0);
+ memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(page_out);
+
+ *pg_offset += bytes;
+ buf_offset += bytes;
+ working_bytes -= bytes;
+ current_buf_start += bytes;
+
+ /* check if we need to pick another page */
+ if (*pg_offset == PAGE_CACHE_SIZE) {
+ (*page_index)++;
+ if (*page_index >= vcnt)
+ return 0;
+
+ page_out = bvec[*page_index].bv_page;
+ *pg_offset = 0;
+ start_byte = page_offset(page_out) - disk_start;
+
+ /*
+ * make sure our new page is covered by this
+ * working buffer
+ */
+ if (total_out <= start_byte)
+ return 1;
+
+ /*
+ * the next page in the biovec might not be adjacent
+ * to the last page, but it might still be found
+ * inside this working buffer. bump our offset pointer
+ */
+ if (total_out > start_byte &&
+ current_buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes = total_out - start_byte;
+ current_buf_start = buf_start + buf_offset;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 421f5b4aa715..51000174b9d7 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,24 +19,27 @@
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
-int btrfs_zlib_decompress(unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen);
-int btrfs_zlib_compress_pages(struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen);
-void btrfs_zlib_exit(void);
+int btrfs_init_compress(void);
+void btrfs_exit_compress(void);
+
+int btrfs_compress_pages(int type, struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out);
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+ struct bio_vec *bvec, int vcnt, size_t srclen);
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen);
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+ unsigned long total_out, u64 disk_start,
+ struct bio_vec *bvec, int vcnt,
+ unsigned long *page_index,
+ unsigned long *pg_offset);
+
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
+
+struct btrfs_compress_op {
+ struct list_head *(*alloc_workspace)(void);
+
+ void (*free_workspace)(struct list_head *workspace);
+
+ int (*compress_pages)(struct list_head *workspace,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out);
+
+ int (*decompress_biovec)(struct list_head *workspace,
+ struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen);
+
+ int (*decompress)(struct list_head *workspace,
+ unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen);
+};
+
+extern struct btrfs_compress_op btrfs_zlib_compress;
+extern struct btrfs_compress_op btrfs_lzo_compress;
+
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9ac171599258..b5baff0dccfe 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
/* this also releases the path */
void btrfs_free_path(struct btrfs_path *p)
{
+ if (!p)
+ return;
btrfs_release_path(NULL, p);
kmem_cache_free(btrfs_path_cachep, p);
}
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
+ if (right == NULL)
+ return 1;
+
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
+ if (left == NULL)
+ return 1;
+
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b875d445ea81..2c98b3af6052 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
#define BTRFS_FSID_SIZE 16
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
+
+/*
+ * File system states
+ */
+
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
+
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
@@ -399,13 +407,15 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
- BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -552,9 +562,11 @@ struct btrfs_timespec {
} __attribute__ ((__packed__));
enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LAST = 2,
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_TYPES = 2,
+ BTRFS_COMPRESS_LAST = 3,
};
struct btrfs_inode_item {
@@ -598,6 +610,8 @@ struct btrfs_dir_item {
u8 type;
} __attribute__ ((__packed__));
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
@@ -896,7 +910,8 @@ struct btrfs_fs_info {
*/
u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
- unsigned long mount_opt;
+ unsigned long mount_opt:20;
+ unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
struct btrfs_transaction *running_transaction;
@@ -1051,6 +1066,9 @@ struct btrfs_fs_info {
unsigned metadata_ratio;
void *bdev_holder;
+
+ /* filesystem state */
+ u64 fs_state;
};
/*
@@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
last_snapshot, 64);
+static inline bool btrfs_root_readonly(struct btrfs_root *root)
+{
+ return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
+}
+
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
@@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
@@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
+int btrfs_error_unpin_extent_range(struct btrfs_root *root,
+ u64 start, u64 end);
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes);
+
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno);
+
+#define btrfs_std_error(fs_info, errno) \
+do { \
+ if ((errno)) \
+ __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
+} while (0)
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 51d2e4de34eb..b531c36455d8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,20 @@
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only);
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root);
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages,
+ int mark);
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+ struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
@@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ if (eb == NULL) {
+ WARN_ON(1);
+ goto out;
+ }
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
BUG_ON(ret);
@@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ if (eb == NULL) {
+ ret = -EIO;
+ goto out;
+ }
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
@@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
}
btrfs_free_path(path);
if (ret) {
+ kfree(root);
if (ret > 0)
ret = -ENOENT;
return ERR_PTR(ret);
@@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
- if (!bh)
+ if (!bh) {
+ err = -EINVAL;
goto fail_iput;
+ }
memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
@@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!btrfs_super_root(disk_super))
goto fail_iput;
+ /* check FS state, whether FS is broken. */
+ fs_info->fs_state |= btrfs_super_flags(disk_super);
+
+ btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+
ret = btrfs_parse_options(tree_root, options);
if (ret) {
err = ret;
@@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
features = btrfs_super_incompat_flags(disk_super);
- if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
- features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
- btrfs_set_super_incompat_flags(disk_super, features);
- }
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+ if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
+ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP;
@@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_set_opt(fs_info->mount_opt, SSD);
}
- if (btrfs_super_log_root(disk_super) != 0) {
+ /* do not make disk changes in broken FS */
+ if (btrfs_super_log_root(disk_super) != 0 &&
+ !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
@@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root)
smp_mb();
btrfs_put_block_group_cache(fs_info);
+
+ /*
+ * Here come 2 situations when btrfs is broken to flip readonly:
+ *
+ * 1. when btrfs flips readonly somewhere else before
+ * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
+ * and btrfs will skip to write sb directly to keep
+ * ERROR state on disk.
+ *
+ * 2. when btrfs flips readonly just in btrfs_commit_super,
+ * and in such case, btrfs cannnot write sb via btrfs_commit_super,
+ * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
+ * btrfs will cleanup all FS resources first and write sb then.
+ */
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
- ret = btrfs_commit_super(root);
+ ret = btrfs_commit_super(root);
+ if (ret)
+ printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+ }
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ ret = btrfs_error_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
@@ -2619,6 +2671,352 @@ out:
return 0;
}
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only)
+{
+ if (read_only)
+ return;
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ printk(KERN_WARNING "warning: mount fs with errors, "
+ "running btrfsck is recommended\n");
+}
+
+int btrfs_error_commit_super(struct btrfs_root *root)
+{
+ int ret;
+
+ mutex_lock(&root->fs_info->cleaner_mutex);
+ btrfs_run_delayed_iputs(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ down_write(&root->fs_info->cleanup_work_sem);
+ up_write(&root->fs_info->cleanup_work_sem);
+
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(root);
+
+ ret = write_ctree_super(NULL, root, 0);
+
+ return ret;
+}
+
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
+{
+ struct btrfs_inode *btrfs_inode;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ mutex_lock(&root->fs_info->ordered_operations_mutex);
+ spin_lock(&root->fs_info->ordered_extent_lock);
+
+ list_splice_init(&root->fs_info->ordered_operations, &splice);
+ while (!list_empty(&splice)) {
+ btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+ ordered_operations);
+
+ list_del_init(&btrfs_inode->ordered_operations);
+
+ btrfs_invalidate_inodes(btrfs_inode->root);
+ }
+
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+ mutex_unlock(&root->fs_info->ordered_operations_mutex);
+
+ return 0;
+}
+
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
+{
+ struct list_head splice;
+ struct btrfs_ordered_extent *ordered;
+ struct inode *inode;
+
+ INIT_LIST_HEAD(&splice);
+
+ spin_lock(&root->fs_info->ordered_extent_lock);
+
+ list_splice_init(&root->fs_info->ordered_extents, &splice);
+ while (!list_empty(&splice)) {
+ ordered = list_entry(splice.next, struct btrfs_ordered_extent,
+ root_extent_list);
+
+ list_del_init(&ordered->root_extent_list);
+ atomic_inc(&ordered->refs);
+
+ /* the inode may be getting freed (in sys_unlink path). */
+ inode = igrab(ordered->inode);
+
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+ if (inode)
+ iput(inode);
+
+ atomic_set(&ordered->refs, 1);
+ btrfs_put_ordered_extent(ordered);
+
+ spin_lock(&root->fs_info->ordered_extent_lock);
+ }
+
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+
+ return 0;
+}
+
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root)
+{
+ struct rb_node *node;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ int ret = 0;
+
+ delayed_refs = &trans->delayed_refs;
+
+ spin_lock(&delayed_refs->lock);
+ if (delayed_refs->num_entries == 0) {
+ printk(KERN_INFO "delayed_refs has NO entry\n");
+ return ret;
+ }
+
+ node = rb_first(&delayed_refs->root);
+ while (node) {
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ node = rb_next(node);
+
+ ref->in_tree = 0;
+ rb_erase(&ref->rb_node, &delayed_refs->root);
+ delayed_refs->num_entries--;
+
+ atomic_set(&ref->refs, 1);
+ if (btrfs_delayed_ref_is_head(ref)) {
+ struct btrfs_delayed_ref_head *head;
+
+ head = btrfs_delayed_node_to_head(ref);
+ mutex_lock(&head->mutex);
+ kfree(head->extent_op);
+ delayed_refs->num_heads--;
+ if (list_empty(&head->cluster))
+ delayed_refs->num_heads_ready--;
+ list_del_init(&head->cluster);
+ mutex_unlock(&head->mutex);
+ }
+
+ spin_unlock(&delayed_refs->lock);
+ btrfs_put_delayed_ref(ref);
+
+ cond_resched();
+ spin_lock(&delayed_refs->lock);
+ }
+
+ spin_unlock(&delayed_refs->lock);
+
+ return ret;
+}
+
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+{
+ struct btrfs_pending_snapshot *snapshot;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ list_splice_init(&t->pending_snapshots, &splice);
+
+ while (!list_empty(&splice)) {
+ snapshot = list_entry(splice.next,
+ struct btrfs_pending_snapshot,
+ list);
+
+ list_del_init(&snapshot->list);
+
+ kfree(snapshot);
+ }
+
+ return 0;
+}
+
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
+{
+ struct btrfs_inode *btrfs_inode;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+
+ spin_lock(&root->fs_info->delalloc_lock);
+
+ while (!list_empty(&splice)) {
+ btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+ delalloc_inodes);
+
+ list_del_init(&btrfs_inode->delalloc_inodes);
+
+ btrfs_invalidate_inodes(btrfs_inode->root);
+ }
+
+ spin_unlock(&root->fs_info->delalloc_lock);
+
+ return 0;
+}
+
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages,
+ int mark)
+{
+ int ret;
+ struct page *page;
+ struct inode *btree_inode = root->fs_info->btree_inode;
+ struct extent_buffer *eb;
+ u64 start = 0;
+ u64 end;
+ u64 offset;
+ unsigned long index;
+
+ while (1) {
+ ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark);
+ if (ret)
+ break;
+
+ clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
+ while (start <= end) {
+ index = start >> PAGE_CACHE_SHIFT;
+ start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
+ page = find_get_page(btree_inode->i_mapping, index);
+ if (!page)
+ continue;
+ offset = page_offset(page);
+
+ spin_lock(&dirty_pages->buffer_lock);
+ eb = radix_tree_lookup(
+ &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
+ offset >> PAGE_CACHE_SHIFT);
+ spin_unlock(&dirty_pages->buffer_lock);
+ if (eb) {
+ ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
+ &eb->bflags);
+ atomic_set(&eb->refs, 1);
+ }
+ if (PageWriteback(page))
+ end_page_writeback(page);
+
+ lock_page(page);
+ if (PageDirty(page)) {
+ clear_page_dirty_for_io(page);
+ spin_lock_irq(&page->mapping->tree_lock);
+ radix_tree_tag_clear(&page->mapping->page_tree,
+ page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ spin_unlock_irq(&page->mapping->tree_lock);
+ }
+
+ page->mapping->a_ops->invalidatepage(page, 0);
+ unlock_page(page);
+ }
+ }
+
+ return ret;
+}
+
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+ struct extent_io_tree *pinned_extents)
+{
+ struct extent_io_tree *unpin;
+ u64 start;
+ u64 end;
+ int ret;
+
+ unpin = pinned_extents;
+ while (1) {
+ ret = find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY);
+ if (ret)
+ break;
+
+ /* opt_discard */
+ ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+
+ clear_extent_dirty(unpin, start, end, GFP_NOFS);
+ btrfs_error_unpin_extent_range(root, start, end);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
+{
+ struct btrfs_transaction *t;
+ LIST_HEAD(list);
+
+ WARN_ON(1);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ mutex_lock(&root->fs_info->transaction_kthread_mutex);
+
+ list_splice_init(&root->fs_info->trans_list, &list);
+ while (!list_empty(&list)) {
+ t = list_entry(list.next, struct btrfs_transaction, list);
+ if (!t)
+ break;
+
+ btrfs_destroy_ordered_operations(root);
+
+ btrfs_destroy_ordered_extents(root);
+
+ btrfs_destroy_delayed_refs(t, root);
+
+ btrfs_block_rsv_release(root,
+ &root->fs_info->trans_block_rsv,
+ t->dirty_pages.dirty_bytes);
+
+ /* FIXME: cleanup wait for commit */
+ t->in_commit = 1;
+ t->blocked = 1;
+ if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
+ t->blocked = 0;
+ if (waitqueue_active(&root->fs_info->transaction_wait))
+ wake_up(&root->fs_info->transaction_wait);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ t->commit_done = 1;
+ if (waitqueue_active(&t->commit_wait))
+ wake_up(&t->commit_wait);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+
+ btrfs_destroy_pending_snapshots(t);
+
+ btrfs_destroy_delalloc_inodes(root);
+
+ spin_lock(&root->fs_info->new_trans_lock);
+ root->fs_info->running_transaction = NULL;
+ spin_unlock(&root->fs_info->new_trans_lock);
+
+ btrfs_destroy_marked_extents(root, &t->dirty_pages,
+ EXTENT_DIRTY);
+
+ btrfs_destroy_pinned_extent(root,
+ root->fs_info->pinned_extents);
+
+ t->use_count = 0;
+ list_del_init(&t->list);
+ memset(t, 0, sizeof(*t));
+ kmem_cache_free(btrfs_transaction_cachep, t);
+ }
+
+ mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ return 0;
+}
+
static struct extent_io_ops btree_extent_io_ops = {
.write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 88e825a0bf21..07b20dc2fd95 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
+int btrfs_error_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 227e5815d838..b55269340cec 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
return btrfs_reduce_alloc_profile(root, flags);
}
-static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
{
u64 flags;
@@ -3161,8 +3161,12 @@ alloc:
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
- if (ret < 0)
- return ret;
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ return ret;
+ else
+ goto commit_trans;
+ }
if (!data_sinfo) {
btrfs_set_inode_space_info(root, inode);
@@ -3173,6 +3177,7 @@ alloc:
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
+commit_trans:
if (!committed && !root->fs_info->open_ioctl_trans) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
@@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
return 0;
}
- WARN_ON(1);
- printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
- block_rsv->size, block_rsv->reserved,
- block_rsv->freed[0], block_rsv->freed[1]);
-
return -ENOSPC;
}
@@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
- cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
+ cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
cache->ro = 1;
ret = 0;
}
+
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return ret;
@@ -8012,6 +8013,62 @@ out:
return ret;
}
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * list. takes mirrors into account.
+ */
+static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
+{
+ struct btrfs_block_group_cache *block_group;
+ u64 free_bytes = 0;
+ int factor;
+
+ list_for_each_entry(block_group, groups_list, list) {
+ spin_lock(&block_group->lock);
+
+ if (!block_group->ro) {
+ spin_unlock(&block_group->lock);
+ continue;
+ }
+
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP))
+ factor = 2;
+ else
+ factor = 1;
+
+ free_bytes += (block_group->key.offset -
+ btrfs_block_group_used(&block_group->item)) *
+ factor;
+
+ spin_unlock(&block_group->lock);
+ }
+
+ return free_bytes;
+}
+
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * space_info. takes mirrors into account.
+ */
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
+{
+ int i;
+ u64 free_bytes = 0;
+
+ spin_lock(&sinfo->lock);
+
+ for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ if (!list_empty(&sinfo->block_groups[i]))
+ free_bytes += __btrfs_get_ro_block_group_free_space(
+ &sinfo->block_groups[i]);
+
+ spin_unlock(&sinfo->lock);
+
+ return free_bytes;
+}
+
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
@@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
mutex_lock(&root->fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
u64 min_free = btrfs_block_group_used(&block_group->item);
- u64 dev_offset, max_avail;
+ u64 dev_offset;
/*
* check to make sure we can actually find a chunk with enough
@@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
*/
if (device->total_bytes > device->bytes_used + min_free) {
ret = find_free_dev_extent(NULL, device, min_free,
- &dev_offset, &max_avail);
+ &dev_offset, NULL);
if (!ret)
break;
ret = -1;
@@ -8584,3 +8641,14 @@ out:
btrfs_free_path(path);
return ret;
}
+
+int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+{
+ return unpin_extent_range(root, start, end);
+}
+
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+ return btrfs_discard_extent(root, bytenr, num_bytes);
+}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e86b9f36507..2e993cf1766e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
BUG_ON(extent_map_end(em) <= cur);
BUG_ON(end < cur);
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
this_bio_flag = EXTENT_BIO_COMPRESSED;
+ extent_set_compress_type(&this_bio_flag,
+ em->compress_type);
+ }
iosize = min(extent_map_end(em) - cur, end - cur + 1);
cur_end = min(extent_map_end(em) - 1, end);
@@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
#endif
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+ if (eb == NULL)
+ return NULL;
eb->start = start;
eb->len = len;
spin_lock_init(&eb->lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4183c8178f01..7083cfafd061 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,8 +20,12 @@
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
-/* flags for bio submission */
+/*
+ * flags for bio submission. The high bits indicate the compression
+ * type for this bio
+ */
#define EXTENT_BIO_COMPRESSED 1
+#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
#define EXTENT_BUFFER_UPTODATE 0
@@ -135,6 +139,17 @@ struct extent_buffer {
wait_queue_head_t lock_wq;
};
+static inline void extent_set_compress_type(unsigned long *bio_flags,
+ int compress_type)
+{
+ *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
+}
+
+static inline int extent_compress_type(unsigned long bio_flags)
+{
+ return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
+}
+
struct extent_map_tree;
static inline struct extent_state *extent_state_next(struct extent_state *state)
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 23cb8da3ff66..b0e1fce12530 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -3,6 +3,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/hardirq.h>
+#include "ctree.h"
#include "extent_map.h"
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
return em;
em->in_tree = 0;
em->flags = 0;
+ em->compress_type = BTRFS_COMPRESS_NONE;
atomic_set(&em->refs, 1);
return em;
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index ab6d74b6e647..28b44dbd1e35 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,7 +26,8 @@ struct extent_map {
unsigned long flags;
struct block_device *bdev;
atomic_t refs;
- int in_tree;
+ unsigned int in_tree:1;
+ unsigned int compress_type:4;
};
struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 66836d85763b..c800d58f3013 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
+#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
@@ -224,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->bdev = em->bdev;
split->flags = flags;
+ split->compress_type = em->compress_type;
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
@@ -238,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
split->flags = flags;
+ split->compress_type = em->compress_type;
if (compressed) {
split->block_len = em->block_len;
@@ -890,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (err)
goto out;
+ /*
+ * If BTRFS flips readonly due to some impossible error
+ * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
+ * although we have opened a file as writable, we have
+ * to stop this write operation to ensure FS consistency.
+ */
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ err = -EROFS;
+ goto out;
+ }
+
file_update_time(file);
BTRFS_I(inode)->sequence++;
@@ -1237,6 +1251,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
+static long btrfs_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct extent_state *cached_state = NULL;
+ u64 cur_offset;
+ u64 last_byte;
+ u64 alloc_start;
+ u64 alloc_end;
+ u64 alloc_hint = 0;
+ u64 locked_end;
+ u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+ struct extent_map *em;
+ int ret;
+
+ alloc_start = offset & ~mask;
+ alloc_end = (offset + len + mask) & ~mask;
+
+ /* We only support the FALLOC_FL_KEEP_SIZE mode */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /*
+ * wait for ordered IO before we have any locks. We'll loop again
+ * below with the locks held.
+ */
+ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
+ mutex_lock(&inode->i_mutex);
+ ret = inode_newsize_ok(inode, alloc_end);
+ if (ret)
+ goto out;
+
+ if (alloc_start > inode->i_size) {
+ ret = btrfs_cont_expand(inode, alloc_start);
+ if (ret)
+ goto out;
+ }
+
+ ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+ if (ret)
+ goto out;
+
+ locked_end = alloc_end - 1;
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+
+ /* the extent lock is ordered inside the running
+ * transaction
+ */
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
+ locked_end, 0, &cached_state, GFP_NOFS);
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ alloc_end - 1);
+ if (ordered &&
+ ordered->file_offset + ordered->len > alloc_start &&
+ ordered->file_offset < alloc_end) {
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ alloc_start, locked_end,
+ &cached_state, GFP_NOFS);
+ /*
+ * we can't wait on the range with the transaction
+ * running or with the extent lock held
+ */
+ btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ } else {
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ }
+
+ cur_offset = alloc_start;
+ while (1) {
+ em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+ alloc_end - cur_offset, 0);
+ BUG_ON(IS_ERR(em) || !em);
+ last_byte = min(extent_map_end(em), alloc_end);
+ last_byte = (last_byte + mask) & ~mask;
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ (cur_offset >= inode->i_size &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
+ last_byte - cur_offset,
+ 1 << inode->i_blkbits,
+ offset + len,
+ &alloc_hint);
+ if (ret < 0) {
+ free_extent_map(em);
+ break;
+ }
+ }
+ free_extent_map(em);
+
+ cur_offset = last_byte;
+ if (cur_offset >= alloc_end) {
+ ret = 0;
+ break;
+ }
+ }
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ &cached_state, GFP_NOFS);
+
+ btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
+out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -1248,6 +1373,7 @@ const struct file_operations btrfs_file_operations = {
.open = generic_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
+ .fallocate = btrfs_fallocate,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a3798a3aa0d2..160b55b3e132 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
size_t cur_size = size;
size_t datasize;
unsigned long offset;
- int use_compress = 0;
+ int compress_type = BTRFS_COMPRESS_NONE;
if (compressed_size && compressed_pages) {
- use_compress = 1;
+ compress_type = root->fs_info->compress_type;
cur_size = compressed_size;
}
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_ram_bytes(leaf, ei, size);
ptr = btrfs_file_extent_inline_start(ei);
- if (use_compress) {
+ if (compress_type != BTRFS_COMPRESS_NONE) {
struct page *cpage;
int i = 0;
while (compressed_size > 0) {
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
compressed_size -= cur_size;
}
btrfs_set_file_extent_compression(leaf, ei,
- BTRFS_COMPRESS_ZLIB);
+ compress_type);
} else {
page = find_get_page(inode->i_mapping,
start >> PAGE_CACHE_SHIFT);
@@ -263,6 +263,7 @@ struct async_extent {
u64 compressed_size;
struct page **pages;
unsigned long nr_pages;
+ int compress_type;
struct list_head list;
};
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow,
u64 start, u64 ram_size,
u64 compressed_size,
struct page **pages,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ int compress_type)
{
struct async_extent *async_extent;
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow,
async_extent->compressed_size = compressed_size;
async_extent->pages = pages;
async_extent->nr_pages = nr_pages;
+ async_extent->compress_type = compress_type;
list_add_tail(&async_extent->list, &cow->extents);
return 0;
}
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode,
unsigned long max_uncompressed = 128 * 1024;
int i;
int will_compress;
+ int compress_type = root->fs_info->compress_type;
actual_end = min_t(u64, isize, end + 1);
again:
@@ -381,12 +385,16 @@ again:
WARN_ON(pages);
pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
- ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
- total_compressed, pages,
- nr_pages, &nr_pages_ret,
- &total_in,
- &total_compressed,
- max_compressed);
+ if (BTRFS_I(inode)->force_compress)
+ compress_type = BTRFS_I(inode)->force_compress;
+
+ ret = btrfs_compress_pages(compress_type,
+ inode->i_mapping, start,
+ total_compressed, pages,
+ nr_pages, &nr_pages_ret,
+ &total_in,
+ &total_compressed,
+ max_compressed);
if (!ret) {
unsigned long offset = total_compressed &
@@ -493,7 +501,8 @@ again:
* and will submit them to the elevator.
*/
add_async_extent(async_cow, start, num_bytes,
- total_compressed, pages, nr_pages_ret);
+ total_compressed, pages, nr_pages_ret,
+ compress_type);
if (start + num_bytes < end) {
start += num_bytes;
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed:
__set_page_dirty_nobuffers(locked_page);
/* unlocked later on in the async handlers */
}
- add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0);
+ add_async_extent(async_cow, start, end - start + 1,
+ 0, NULL, 0, BTRFS_COMPRESS_NONE);
*num_added += 1;
}
@@ -640,6 +650,7 @@ retry:
em->block_start = ins.objectid;
em->block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
+ em->compress_type = async_extent->compress_type;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -656,11 +667,13 @@ retry:
async_extent->ram_size - 1, 0);
}
- ret = btrfs_add_ordered_extent(inode, async_extent->start,
- ins.objectid,
- async_extent->ram_size,
- ins.offset,
- BTRFS_ORDERED_COMPRESSED);
+ ret = btrfs_add_ordered_extent_compress(inode,
+ async_extent->start,
+ ins.objectid,
+ async_extent->ram_size,
+ ins.offset,
+ BTRFS_ORDERED_COMPRESSED,
+ async_extent->compress_type);
BUG_ON(ret);
/*
@@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
- int compressed = 0;
+ int compress_type = 0;
int ret;
bool nolock = false;
@@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
- compressed = 1;
+ compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
- BUG_ON(compressed);
+ BUG_ON(compress_type);
ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
@@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->disk_len,
ordered_extent->len,
ordered_extent->len,
- compressed, 0, 0,
+ compress_type, 0, 0,
BTRFS_FILE_EXTENT_REG);
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
ordered_extent->file_offset,
@@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
logical = em->block_start;
failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+ extent_set_compress_type(&failrec->bio_flags,
+ em->compress_type);
}
failrec->logical = logical;
free_extent_map(em);
@@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
int err;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
err = inode_change_ok(inode, attr);
if (err)
return err;
@@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path,
size_t max_size;
unsigned long inline_size;
unsigned long ptr;
+ int compress_type;
WARN_ON(pg_offset != 0);
+ compress_type = btrfs_file_extent_compression(leaf, item);
max_size = btrfs_file_extent_ram_bytes(leaf, item);
inline_size = btrfs_file_extent_inline_item_len(leaf,
btrfs_item_nr(leaf, path->slots[0]));
@@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
read_extent_buffer(leaf, tmp, ptr, inline_size);
max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
- ret = btrfs_zlib_decompress(tmp, page, extent_offset,
- inline_size, max_size);
+ ret = btrfs_decompress(compress_type, tmp, page,
+ extent_offset, inline_size, max_size);
if (ret) {
char *kaddr = kmap_atomic(page, KM_USER0);
unsigned long copy_size = min_t(u64,
@@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_trans_handle *trans = NULL;
- int compressed;
+ int compress_type;
again:
read_lock(&em_tree->lock);
@@ -5041,7 +5062,7 @@ again:
found_type = btrfs_file_extent_type(leaf, item);
extent_start = found_key.offset;
- compressed = btrfs_file_extent_compression(leaf, item);
+ compress_type = btrfs_file_extent_compression(leaf, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
@@ -5087,8 +5108,9 @@ again:
em->block_start = EXTENT_MAP_HOLE;
goto insert;
}
- if (compressed) {
+ if (compress_type != BTRFS_COMPRESS_NONE) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
em->block_start = bytenr;
em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
item);
@@ -5122,12 +5144,14 @@ again:
em->len = (copy_size + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
em->orig_start = EXTENT_MAP_INLINE;
- if (compressed)
+ if (compress_type) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
+ }
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
if (create == 0 && !PageUptodate(page)) {
- if (btrfs_file_extent_compression(leaf, item) ==
- BTRFS_COMPRESS_ZLIB) {
+ if (btrfs_file_extent_compression(leaf, item) !=
+ BTRFS_COMPRESS_NONE) {
ret = uncompress_inline(path, inode, page,
pg_offset,
extent_offset, item);
@@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
ei->dummy_inode = 0;
- ei->force_compress = 0;
+ ei->force_compress = BTRFS_COMPRESS_NONE;
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
@@ -7098,116 +7122,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
-static long btrfs_fallocate(struct inode *inode, int mode,
- loff_t offset, loff_t len)
-{
- struct extent_state *cached_state = NULL;
- u64 cur_offset;
- u64 last_byte;
- u64 alloc_start;
- u64 alloc_end;
- u64 alloc_hint = 0;
- u64 locked_end;
- u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
- struct extent_map *em;
- int ret;
-
- alloc_start = offset & ~mask;
- alloc_end = (offset + len + mask) & ~mask;
-
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode && (mode != FALLOC_FL_KEEP_SIZE))
- return -EOPNOTSUPP;
-
- /*
- * wait for ordered IO before we have any locks. We'll loop again
- * below with the locks held.
- */
- btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
-
- mutex_lock(&inode->i_mutex);
- ret = inode_newsize_ok(inode, alloc_end);
- if (ret)
- goto out;
-
- if (alloc_start > inode->i_size) {
- ret = btrfs_cont_expand(inode, alloc_start);
- if (ret)
- goto out;
- }
-
- ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
- if (ret)
- goto out;
-
- locked_end = alloc_end - 1;
- while (1) {
- struct btrfs_ordered_extent *ordered;
-
- /* the extent lock is ordered inside the running
- * transaction
- */
- lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
- locked_end, 0, &cached_state, GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- alloc_end - 1);
- if (ordered &&
- ordered->file_offset + ordered->len > alloc_start &&
- ordered->file_offset < alloc_end) {
- btrfs_put_ordered_extent(ordered);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- alloc_start, locked_end,
- &cached_state, GFP_NOFS);
- /*
- * we can't wait on the range with the transaction
- * running or with the extent lock held
- */
- btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
- } else {
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- break;
- }
- }
-
- cur_offset = alloc_start;
- while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
- alloc_end - cur_offset, 0);
- BUG_ON(IS_ERR(em) || !em);
- last_byte = min(extent_map_end(em), alloc_end);
- last_byte = (last_byte + mask) & ~mask;
- if (em->block_start == EXTENT_MAP_HOLE ||
- (cur_offset >= inode->i_size &&
- !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
- last_byte - cur_offset,
- 1 << inode->i_blkbits,
- offset + len,
- &alloc_hint);
- if (ret < 0) {
- free_extent_map(em);
- break;
- }
- }
- free_extent_map(em);
-
- cur_offset = last_byte;
- if (cur_offset >= alloc_end) {
- ret = 0;
- break;
- }
- }
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state, GFP_NOFS);
-
- btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
-out:
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
static int btrfs_set_page_dirty(struct page *page)
{
return __set_page_dirty_nobuffers(page);
@@ -7215,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page)
static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
+ return -EROFS;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
return generic_permission(inode, mask, flags, btrfs_check_acl);
@@ -7310,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
- .fallocate = btrfs_fallocate,
.fiemap = btrfs_fiemap,
};
static const struct inode_operations btrfs_special_inode_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f87552a1d7ea..a506a22b522a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int flags, oldflags;
int ret;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
@@ -360,7 +363,8 @@ fail:
}
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
- char *name, int namelen, u64 *async_transid)
+ char *name, int namelen, u64 *async_transid,
+ bool readonly)
{
struct inode *inode;
struct dentry *parent;
@@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
btrfs_init_block_rsv(&pending_snapshot->block_rsv);
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
+ pending_snapshot->readonly = readonly;
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
if (IS_ERR(trans)) {
@@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
- u64 *async_transid)
+ u64 *async_transid, bool readonly)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
if (snap_src) {
error = create_snapshot(snap_src, dentry,
- name, namelen, async_transid);
+ name, namelen, async_transid, readonly);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
name, namelen, async_transid);
@@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file,
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct page *page;
+ struct btrfs_super_block *disk_super;
unsigned long last_index;
unsigned long ra_pages = root->fs_info->bdi.ra_pages;
unsigned long total_read = 0;
+ u64 features;
u64 page_start;
u64 page_end;
u64 last_len = 0;
@@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file,
u64 defrag_end = 0;
unsigned long i;
int ret;
+ int compress_type = BTRFS_COMPRESS_ZLIB;
+
+ if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
+ if (range->compress_type > BTRFS_COMPRESS_TYPES)
+ return -EINVAL;
+ if (range->compress_type)
+ compress_type = range->compress_type;
+ }
if (inode->i_size == 0)
return 0;
@@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file,
total_read++;
mutex_lock(&inode->i_mutex);
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
- BTRFS_I(inode)->force_compress = 1;
+ BTRFS_I(inode)->force_compress = compress_type;
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
@@ -781,10 +796,17 @@ loop_unlock:
atomic_dec(&root->fs_info->async_submit_draining);
mutex_lock(&inode->i_mutex);
- BTRFS_I(inode)->force_compress = 0;
+ BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
mutex_unlock(&inode->i_mutex);
}
+ disk_super = &root->fs_info->super_copy;
+ features = btrfs_super_incompat_flags(disk_super);
+ if (range->compress_type == BTRFS_COMPRESS_LZO) {
+ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ }
+
return 0;
err_reservations:
@@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
char *name,
unsigned long fd,
int subvol,
- u64 *transid)
+ u64 *transid,
+ bool readonly)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct file *src_file;
@@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, transid);
+ NULL, transid, readonly);
} else {
struct inode *src_inode;
src_file = fget(fd);
@@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
}
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
- transid);
+ transid, readonly);
fput(src_file);
}
out:
@@ -946,58 +969,139 @@ out:
}
static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol,
- int v2)
+ void __user *arg, int subvol)
{
- struct btrfs_ioctl_vol_args *vol_args = NULL;
- struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
- char *name;
- u64 fd;
+ struct btrfs_ioctl_vol_args *vol_args;
int ret;
- if (v2) {
- u64 transid = 0;
- u64 *ptr = NULL;
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
- if (IS_ERR(vol_args_v2))
- return PTR_ERR(vol_args_v2);
+ ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
+ vol_args->fd, subvol,
+ NULL, false);
- if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
- ret = -EINVAL;
- goto out;
- }
-
- name = vol_args_v2->name;
- fd = vol_args_v2->fd;
- vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+ kfree(vol_args);
+ return ret;
+}
- if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
- ptr = &transid;
+static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
+ void __user *arg, int subvol)
+{
+ struct btrfs_ioctl_vol_args_v2 *vol_args;
+ int ret;
+ u64 transid = 0;
+ u64 *ptr = NULL;
+ bool readonly = false;
- ret = btrfs_ioctl_snap_create_transid(file, name, fd,
- subvol, ptr);
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
- if (ret == 0 && ptr &&
- copy_to_user(arg +
- offsetof(struct btrfs_ioctl_vol_args_v2,
- transid), ptr, sizeof(*ptr)))
- ret = -EFAULT;
- } else {
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
- name = vol_args->name;
- fd = vol_args->fd;
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-
- ret = btrfs_ioctl_snap_create_transid(file, name, fd,
- subvol, NULL);
+ if (vol_args->flags &
+ ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
+ ret = -EOPNOTSUPP;
+ goto out;
}
+
+ if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+ ptr = &transid;
+ if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
+ readonly = true;
+
+ ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
+ vol_args->fd, subvol,
+ ptr, readonly);
+
+ if (ret == 0 && ptr &&
+ copy_to_user(arg +
+ offsetof(struct btrfs_ioctl_vol_args_v2,
+ transid), ptr, sizeof(*ptr)))
+ ret = -EFAULT;
out:
kfree(vol_args);
- kfree(vol_args_v2);
+ return ret;
+}
+static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
+ void __user *arg)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret = 0;
+ u64 flags = 0;
+
+ if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
+ return -EINVAL;
+
+ down_read(&root->fs_info->subvol_sem);
+ if (btrfs_root_readonly(root))
+ flags |= BTRFS_SUBVOL_RDONLY;
+ up_read(&root->fs_info->subvol_sem);
+
+ if (copy_to_user(arg, &flags, sizeof(flags)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
+ void __user *arg)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ u64 root_flags;
+ u64 flags;
+ int ret = 0;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
+ return -EINVAL;
+
+ if (copy_from_user(&flags, arg, sizeof(flags)))
+ return -EFAULT;
+
+ if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ return -EINVAL;
+
+ if (flags & ~BTRFS_SUBVOL_RDONLY)
+ return -EOPNOTSUPP;
+
+ down_write(&root->fs_info->subvol_sem);
+
+ /* nothing to do */
+ if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
+ goto out;
+
+ root_flags = btrfs_root_flags(&root->root_item);
+ if (flags & BTRFS_SUBVOL_RDONLY)
+ btrfs_set_root_flags(&root->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
+ else
+ btrfs_set_root_flags(&root->root_item,
+ root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_reset;
+ }
+
+ ret = btrfs_update_root(trans, root,
+ &root->root_key, &root->root_item);
+
+ btrfs_commit_transaction(trans, root);
+out_reset:
+ if (ret)
+ btrfs_set_root_flags(&root->root_item, root_flags);
+out:
+ up_write(&root->fs_info->subvol_sem);
return ret;
}
@@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
struct btrfs_ioctl_defrag_range_args *range;
int ret;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
@@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
@@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
if (file->private_data)
goto out;
+ ret = -EROFS;
+ if (btrfs_root_readonly(root))
+ goto out;
+
ret = mnt_want_write(file->f_path.mnt);
if (ret)
goto out;
@@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0, 0);
+ return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SNAP_CREATE_V2:
- return btrfs_ioctl_snap_create(file, argp, 0, 1);
+ return btrfs_ioctl_snap_create_v2(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1, 0);
+ return btrfs_ioctl_snap_create(file, argp, 1);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
+ case BTRFS_IOC_SUBVOL_GETFLAGS:
+ return btrfs_ioctl_subvol_getflags(file, argp);
+ case BTRFS_IOC_SUBVOL_SETFLAGS:
+ return btrfs_ioctl_subvol_setflags(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
return btrfs_ioctl_default_subvol(file, argp);
case BTRFS_IOC_DEFRAG:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index c344d12c646b..8fb382167b13 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args {
};
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
@@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args {
*/
__u32 extent_thresh;
+ /*
+ * which compression method to use if turning on compression
+ * for this defrag operation. If unspecified, zlib will
+ * be used
+ */
+ __u32 compress_type;
+
/* spare for later */
- __u32 unused[5];
+ __u32 unused[4];
};
struct btrfs_ioctl_space_info {
@@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
new file mode 100644
index 000000000000..cc9b450399df
--- /dev/null
+++ b/fs/btrfs/lzo.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/lzo.h>
+#include "compression.h"
+
+#define LZO_LEN 4
+
+struct workspace {
+ void *mem;
+ void *buf; /* where compressed data goes */
+ void *cbuf; /* where decompressed data goes */
+ struct list_head list;
+};
+
+static void lzo_free_workspace(struct list_head *ws)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+
+ vfree(workspace->buf);
+ vfree(workspace->cbuf);
+ vfree(workspace->mem);
+ kfree(workspace);
+}
+
+static struct list_head *lzo_alloc_workspace(void)
+{
+ struct workspace *workspace;
+
+ workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
+ if (!workspace)
+ return ERR_PTR(-ENOMEM);
+
+ workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
+ workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+ workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+ if (!workspace->mem || !workspace->buf || !workspace->cbuf)
+ goto fail;
+
+ INIT_LIST_HEAD(&workspace->list);
+
+ return &workspace->list;
+fail:
+ lzo_free_workspace(&workspace->list);
+ return ERR_PTR(-ENOMEM);
+}
+
+static inline void write_compress_length(char *buf, size_t len)
+{
+ __le32 dlen;
+
+ dlen = cpu_to_le32(len);
+ memcpy(buf, &dlen, LZO_LEN);
+}
+
+static inline size_t read_compress_length(char *buf)
+{
+ __le32 dlen;
+
+ memcpy(&dlen, buf, LZO_LEN);
+ return le32_to_cpu(dlen);
+}
+
+static int lzo_compress_pages(struct list_head *ws,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ int ret = 0;
+ char *data_in;
+ char *cpage_out;
+ int nr_pages = 0;
+ struct page *in_page = NULL;
+ struct page *out_page = NULL;
+ unsigned long bytes_left;
+
+ size_t in_len;
+ size_t out_len;
+ char *buf;
+ unsigned long tot_in = 0;
+ unsigned long tot_out = 0;
+ unsigned long pg_bytes_left;
+ unsigned long out_offset;
+ unsigned long bytes;
+
+ *out_pages = 0;
+ *total_out = 0;
+ *total_in = 0;
+
+ in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ data_in = kmap(in_page);
+
+ /*
+ * store the size of all chunks of compressed data in
+ * the first 4 bytes
+ */
+ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ cpage_out = kmap(out_page);
+ out_offset = LZO_LEN;
+ tot_out = LZO_LEN;
+ pages[0] = out_page;
+ nr_pages = 1;
+ pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+
+ /* compress at most one page of data each time */
+ in_len = min(len, PAGE_CACHE_SIZE);
+ while (tot_in < len) {
+ ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
+ &out_len, workspace->mem);
+ if (ret != LZO_E_OK) {
+ printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
+ ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* store the size of this chunk of compressed data */
+ write_compress_length(cpage_out + out_offset, out_len);
+ tot_out += LZO_LEN;
+ out_offset += LZO_LEN;
+ pg_bytes_left -= LZO_LEN;
+
+ tot_in += in_len;
+ tot_out += out_len;
+
+ /* copy bytes from the working buffer into the pages */
+ buf = workspace->cbuf;
+ while (out_len) {
+ bytes = min_t(unsigned long, pg_bytes_left, out_len);
+
+ memcpy(cpage_out + out_offset, buf, bytes);
+
+ out_len -= bytes;
+ pg_bytes_left -= bytes;
+ buf += bytes;
+ out_offset += bytes;
+
+ /*
+ * we need another page for writing out.
+ *
+ * Note if there's less than 4 bytes left, we just
+ * skip to a new page.
+ */
+ if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
+ pg_bytes_left == 0) {
+ if (pg_bytes_left) {
+ memset(cpage_out + out_offset, 0,
+ pg_bytes_left);
+ tot_out += pg_bytes_left;
+ }
+
+ /* we're done, don't allocate new page */
+ if (out_len == 0 && tot_in >= len)
+ break;
+
+ kunmap(out_page);
+ if (nr_pages == nr_dest_pages) {
+ out_page = NULL;
+ ret = -1;
+ goto out;
+ }
+
+ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ cpage_out = kmap(out_page);
+ pages[nr_pages++] = out_page;
+
+ pg_bytes_left = PAGE_CACHE_SIZE;
+ out_offset = 0;
+ }
+ }
+
+ /* we're making it bigger, give up */
+ if (tot_in > 8192 && tot_in < tot_out)
+ goto out;
+
+ /* we're all done */
+ if (tot_in >= len)
+ break;
+
+ if (tot_out > max_out)
+ break;
+
+ bytes_left = len - tot_in;
+ kunmap(in_page);
+ page_cache_release(in_page);
+
+ start += PAGE_CACHE_SIZE;
+ in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ data_in = kmap(in_page);
+ in_len = min(bytes_left, PAGE_CACHE_SIZE);
+ }
+
+ if (tot_out > tot_in)
+ goto out;
+
+ /* store the size of all chunks of compressed data */
+ cpage_out = kmap(pages[0]);
+ write_compress_length(cpage_out, tot_out);
+
+ kunmap(pages[0]);
+
+ ret = 0;
+ *total_out = tot_out;
+ *total_in = tot_in;
+out:
+ *out_pages = nr_pages;
+ if (out_page)
+ kunmap(out_page);
+
+ if (in_page) {
+ kunmap(in_page);
+ page_cache_release(in_page);
+ }
+
+ return ret;
+}
+
+static int lzo_decompress_biovec(struct list_head *ws,
+ struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ int ret = 0, ret2;
+ char *data_in;
+ unsigned long page_in_index = 0;
+ unsigned long page_out_index = 0;
+ unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
+ PAGE_CACHE_SIZE;
+ unsigned long buf_start;
+ unsigned long buf_offset = 0;
+ unsigned long bytes;
+ unsigned long working_bytes;
+ unsigned long pg_offset;
+
+ size_t in_len;
+ size_t out_len;
+ unsigned long in_offset;
+ unsigned long in_page_bytes_left;
+ unsigned long tot_in;
+ unsigned long tot_out;
+ unsigned long tot_len;
+ char *buf;
+
+ data_in = kmap(pages_in[0]);
+ tot_len = read_compress_length(data_in);
+
+ tot_in = LZO_LEN;
+ in_offset = LZO_LEN;
+ tot_len = min_t(size_t, srclen, tot_len);
+ in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+
+ tot_out = 0;
+ pg_offset = 0;
+
+ while (tot_in < tot_len) {
+ in_len = read_compress_length(data_in + in_offset);
+ in_page_bytes_left -= LZO_LEN;
+ in_offset += LZO_LEN;
+ tot_in += LZO_LEN;
+
+ tot_in += in_len;
+ working_bytes = in_len;
+
+ /* fast path: avoid using the working buffer */
+ if (in_page_bytes_left >= in_len) {
+ buf = data_in + in_offset;
+ bytes = in_len;
+ goto cont;
+ }
+
+ /* copy bytes from the pages into the working buffer */
+ buf = workspace->cbuf;
+ buf_offset = 0;
+ while (working_bytes) {
+ bytes = min(working_bytes, in_page_bytes_left);
+
+ memcpy(buf + buf_offset, data_in + in_offset, bytes);
+ buf_offset += bytes;
+cont:
+ working_bytes -= bytes;
+ in_page_bytes_left -= bytes;
+ in_offset += bytes;
+
+ /* check if we need to pick another page */
+ if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
+ || in_page_bytes_left == 0) {
+ tot_in += in_page_bytes_left;
+
+ if (working_bytes == 0 && tot_in >= tot_len)
+ break;
+
+ kunmap(pages_in[page_in_index]);
+ page_in_index++;
+ if (page_in_index >= total_pages_in) {
+ ret = -1;
+ data_in = NULL;
+ goto done;
+ }
+ data_in = kmap(pages_in[page_in_index]);
+
+ in_page_bytes_left = PAGE_CACHE_SIZE;
+ in_offset = 0;
+ }
+ }
+
+ out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+ ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
+ &out_len);
+ if (ret != LZO_E_OK) {
+ printk(KERN_WARNING "btrfs decompress failed\n");
+ ret = -1;
+ break;
+ }
+
+ buf_start = tot_out;
+ tot_out += out_len;
+
+ ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
+ tot_out, disk_start,
+ bvec, vcnt,
+ &page_out_index, &pg_offset);
+ if (ret2 == 0)
+ break;
+ }
+done:
+ if (data_in)
+ kunmap(pages_in[page_in_index]);
+ return ret;
+}
+
+static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ size_t in_len;
+ size_t out_len;
+ size_t tot_len;
+ int ret = 0;
+ char *kaddr;
+ unsigned long bytes;
+
+ BUG_ON(srclen < LZO_LEN);
+
+ tot_len = read_compress_length(data_in);
+ data_in += LZO_LEN;
+
+ in_len = read_compress_length(data_in);
+ data_in += LZO_LEN;
+
+ out_len = PAGE_CACHE_SIZE;
+ ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
+ if (ret != LZO_E_OK) {
+ printk(KERN_WARNING "btrfs decompress failed!\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (out_len < start_byte) {
+ ret = -1;
+ goto out;
+ }
+
+ bytes = min_t(unsigned long, destlen, out_len - start_byte);
+
+ kaddr = kmap_atomic(dest_page, KM_USER0);
+ memcpy(kaddr, workspace->buf + start_byte, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+out:
+ return ret;
+}
+
+struct btrfs_compress_op btrfs_lzo_compress = {
+ .alloc_workspace = lzo_alloc_workspace,
+ .free_workspace = lzo_free_workspace,
+ .compress_pages = lzo_compress_pages,
+ .decompress_biovec = lzo_decompress_biovec,
+ .decompress = lzo_decompress,
+};
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ae7737e352c9..2b61e1ddcd99 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
*/
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
- int type, int dio)
+ int type, int dio, int compress_type)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = inode;
+ entry->compress_type = compress_type;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 0);
+ disk_len, type, 0,
+ BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 1);
+ disk_len, type, 1,
+ BTRFS_COMPRESS_NONE);
+}
+
+int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+ u64 start, u64 len, u64 disk_len,
+ int type, int compress_type)
+{
+ return __btrfs_add_ordered_extent(inode, file_offset, start, len,
+ disk_len, type, 0,
+ compress_type);
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 61dca83119dd..ff1f69aa1883 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
-#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
+#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent {
/* flags (described above) */
unsigned long flags;
+ /* compression algorithm */
+ int compress_type;
+
/* reference count */
atomic_t refs;
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
+int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+ u64 start, u64 len, u64 disk_len,
+ int type, int compress_type);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 22acdaa78ce1..b2130c46fdb5 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,90 @@
static const struct super_operations btrfs_super_ops;
+static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
+ char nbuf[16])
+{
+ char *errstr = NULL;
+
+ switch (errno) {
+ case -EIO:
+ errstr = "IO failure";
+ break;
+ case -ENOMEM:
+ errstr = "Out of memory";
+ break;
+ case -EROFS:
+ errstr = "Readonly filesystem";
+ break;
+ default:
+ if (nbuf) {
+ if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
+ errstr = nbuf;
+ }
+ break;
+ }
+
+ return errstr;
+}
+
+static void __save_error_info(struct btrfs_fs_info *fs_info)
+{
+ /*
+ * today we only save the error info into ram. Long term we'll
+ * also send it down to the disk
+ */
+ fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
+}
+
+/* NOTE:
+ * We move write_super stuff at umount in order to avoid deadlock
+ * for umount hold all lock.
+ */
+static void save_error_info(struct btrfs_fs_info *fs_info)
+{
+ __save_error_info(fs_info);
+}
+
+/* btrfs handle error by forcing the filesystem readonly */
+static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
+{
+ struct super_block *sb = fs_info->sb;
+
+ if (sb->s_flags & MS_RDONLY)
+ return;
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ sb->s_flags |= MS_RDONLY;
+ printk(KERN_INFO "btrfs is forced readonly\n");
+ }
+}
+
+/*
+ * __btrfs_std_error decodes expected errors from the caller and
+ * invokes the approciate error response.
+ */
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno)
+{
+ struct super_block *sb = fs_info->sb;
+ char nbuf[16];
+ const char *errstr;
+
+ /*
+ * Special case: if the error is EROFS, and we're already
+ * under MS_RDONLY, then it is safe here.
+ */
+ if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
+ return;
+
+ errstr = btrfs_decode_error(fs_info, errno, nbuf);
+ printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ save_error_info(fs_info);
+
+ btrfs_handle_error(fs_info);
+}
+
static void btrfs_put_super(struct super_block *sb)
{
struct btrfs_root *root = btrfs_sb(sb);
@@ -69,9 +153,9 @@ enum {
Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
- Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
- Opt_user_subvol_rm_allowed,
+ Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
+ Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
};
static match_table_t tokens = {
@@ -86,7 +170,9 @@ static match_table_t tokens = {
{Opt_alloc_start, "alloc_start=%s"},
{Opt_thread_pool, "thread_pool=%d"},
{Opt_compress, "compress"},
+ {Opt_compress_type, "compress=%s"},
{Opt_compress_force, "compress-force"},
+ {Opt_compress_force_type, "compress-force=%s"},
{Opt_ssd, "ssd"},
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd, "nossd"},
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
char *p, *num, *orig;
int intarg;
int ret = 0;
+ char *compress_type;
+ bool compress_force = false;
if (!options)
return 0;
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, NODATACOW);
btrfs_set_opt(info->mount_opt, NODATASUM);
break;
- case Opt_compress:
- printk(KERN_INFO "btrfs: use compression\n");
- btrfs_set_opt(info->mount_opt, COMPRESS);
- break;
case Opt_compress_force:
- printk(KERN_INFO "btrfs: forcing compression\n");
- btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+ case Opt_compress_force_type:
+ compress_force = true;
+ case Opt_compress:
+ case Opt_compress_type:
+ if (token == Opt_compress ||
+ token == Opt_compress_force ||
+ strcmp(args[0].from, "zlib") == 0) {
+ compress_type = "zlib";
+ info->compress_type = BTRFS_COMPRESS_ZLIB;
+ } else if (strcmp(args[0].from, "lzo") == 0) {
+ compress_type = "lzo";
+ info->compress_type = BTRFS_COMPRESS_LZO;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
btrfs_set_opt(info->mount_opt, COMPRESS);
+ if (compress_force) {
+ btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+ pr_info("btrfs: force %s compression\n",
+ compress_type);
+ } else
+ pr_info("btrfs: use %s compression\n",
+ compress_type);
break;
case Opt_ssd:
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+/*
+ * The helper to calc the free space on the devices that can be used to store
+ * file data.
+ */
+static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_device_info *devices_info;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_device *device;
+ u64 skip_space;
+ u64 type;
+ u64 avail_space;
+ u64 used_space;
+ u64 min_stripe_size;
+ int min_stripes = 1;
+ int i = 0, nr_devices;
+ int ret;
+
+ nr_devices = fs_info->fs_devices->rw_devices;
+ BUG_ON(!nr_devices);
+
+ devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+ GFP_NOFS);
+ if (!devices_info)
+ return -ENOMEM;
+
+ /* calc min stripe number for data space alloction */
+ type = btrfs_get_alloc_profile(root, 1);
+ if (type & BTRFS_BLOCK_GROUP_RAID0)
+ min_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1)
+ min_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
+ min_stripes = 4;
+
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_stripe_size = 2 * BTRFS_STRIPE_LEN;
+ else
+ min_stripe_size = BTRFS_STRIPE_LEN;
+
+ list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+ if (!device->in_fs_metadata)
+ continue;
+
+ avail_space = device->total_bytes - device->bytes_used;
+
+ /* align with stripe_len */
+ do_div(avail_space, BTRFS_STRIPE_LEN);
+ avail_space *= BTRFS_STRIPE_LEN;
+
+ /*
+ * In order to avoid overwritting the superblock on the drive,
+ * btrfs starts at an offset of at least 1MB when doing chunk
+ * allocation.
+ */
+ skip_space = 1024 * 1024;
+
+ /* user can set the offset in fs_info->alloc_start. */
+ if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
+ device->total_bytes)
+ skip_space = max(fs_info->alloc_start, skip_space);
+
+ /*
+ * btrfs can not use the free space in [0, skip_space - 1],
+ * we must subtract it from the total. In order to implement
+ * it, we account the used space in this range first.
+ */
+ ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
+ &used_space);
+ if (ret) {
+ kfree(devices_info);
+ return ret;
+ }
+
+ /* calc the free space in [0, skip_space - 1] */
+ skip_space -= used_space;
+
+ /*
+ * we can use the free space in [0, skip_space - 1], subtract
+ * it from the total.
+ */
+ if (avail_space && avail_space >= skip_space)
+ avail_space -= skip_space;
+ else
+ avail_space = 0;
+
+ if (avail_space < min_stripe_size)
+ continue;
+
+ devices_info[i].dev = device;
+ devices_info[i].max_avail = avail_space;
+
+ i++;
+ }
+
+ nr_devices = i;
+
+ btrfs_descending_sort_devices(devices_info, nr_devices);
+
+ i = nr_devices - 1;
+ avail_space = 0;
+ while (nr_devices >= min_stripes) {
+ if (devices_info[i].max_avail >= min_stripe_size) {
+ int j;
+ u64 alloc_size;
+
+ avail_space += devices_info[i].max_avail * min_stripes;
+ alloc_size = devices_info[i].max_avail;
+ for (j = i + 1 - min_stripes; j <= i; j++)
+ devices_info[j].max_avail -= alloc_size;
+ }
+ i--;
+ nr_devices--;
+ }
+
+ kfree(devices_info);
+ *free_bytes = avail_space;
+ return 0;
+}
+
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
- u64 total_used_data = 0;
+ u64 total_free_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
+ int ret;
+ /* holding chunk_muext to avoid allocating new chunks */
+ mutex_lock(&root->fs_info->chunk_mutex);
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
- BTRFS_BLOCK_GROUP_SYSTEM))
- total_used_data += found->disk_total;
- else
- total_used_data += found->disk_used;
+ if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+ total_free_data += found->disk_total - found->disk_used;
+ total_free_data -=
+ btrfs_account_ro_block_groups_free_space(found);
+ }
+
total_used += found->disk_used;
}
rcu_read_unlock();
@@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_bavail = total_free_data;
+ ret = btrfs_calc_avail_data_space(root, &total_free_data);
+ if (ret) {
+ mutex_unlock(&root->fs_info->chunk_mutex);
+ return ret;
+ }
+ buf->f_bavail += total_free_data;
+ buf->f_bavail = buf->f_bavail >> bits;
+ mutex_unlock(&root->fs_info->chunk_mutex);
/* We treat it as constant endianness (it doesn't matter _which_)
because we want the fsid to come out the same whether mounted
@@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void)
if (err)
return err;
- err = btrfs_init_cachep();
+ err = btrfs_init_compress();
if (err)
goto free_sysfs;
+ err = btrfs_init_cachep();
+ if (err)
+ goto free_compress;
+
err = extent_io_init();
if (err)
goto free_cachep;
@@ -928,6 +1171,8 @@ free_extent_io:
extent_io_exit();
free_cachep:
btrfs_destroy_cachep();
+free_compress:
+ btrfs_exit_compress();
free_sysfs:
btrfs_exit_sysfs();
return err;
@@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void)
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
btrfs_cleanup_fs_uuids();
- btrfs_zlib_exit();
+ btrfs_exit_compress();
}
module_init(init_btrfs_fs)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f50e931fc217..bae5c7b8bbe2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
int ret;
+
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ return ERR_PTR(-EROFS);
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
@@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
+ u64 root_flags;
new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
if (!new_root_item) {
@@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+ root_flags = btrfs_root_flags(new_root_item);
+ if (pending->readonly)
+ root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
+ else
+ root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
+ btrfs_set_root_flags(new_root_item, root_flags);
+
old = btrfs_lock_root_node(root);
btrfs_cow_block(trans, root, old, NULL, 0, &old);
btrfs_set_lock_blocking(old);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index f104b57ad4ef..229a594cacd5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot {
struct btrfs_block_rsv block_rsv;
/* extra metadata reseration for relocation */
int error;
+ bool readonly;
struct list_head list;
};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1718e1a5c320..d158530233b7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -22,6 +22,7 @@
#include <linux/blkdev.h>
#include <linux/random.h>
#include <linux/iocontext.h>
+#include <linux/capability.h>
#include <asm/div64.h>
#include "compat.h"
#include "ctree.h"
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
set_blocksize(bdev, 4096);
bh = btrfs_read_dev_super(bdev);
- if (!bh)
+ if (!bh) {
+ ret = -EINVAL;
goto error_close;
+ }
disk_super = (struct btrfs_super_block *)bh->b_data;
devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
goto error_close;
bh = btrfs_read_dev_super(bdev);
if (!bh) {
- ret = -EIO;
+ ret = -EINVAL;
goto error_close;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -729,59 +732,167 @@ error:
return ret;
}
+/* helper to account the used device space in the range */
+int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
+ u64 end, u64 *length)
+{
+ struct btrfs_key key;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_dev_extent *dev_extent;
+ struct btrfs_path *path;
+ u64 extent_end;
+ int ret;
+ int slot;
+ struct extent_buffer *l;
+
+ *length = 0;
+
+ if (start >= device->total_bytes)
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 2;
+
+ key.objectid = device->devid;
+ key.offset = start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = btrfs_previous_item(root, path, key.objectid, key.type);
+ if (ret < 0)
+ goto out;
+ }
+
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto out;
+
+ break;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid < device->devid)
+ goto next;
+
+ if (key.objectid > device->devid)
+ break;
+
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
+
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ extent_end = key.offset + btrfs_dev_extent_length(l,
+ dev_extent);
+ if (key.offset <= start && extent_end > end) {
+ *length = end - start + 1;
+ break;
+ } else if (key.offset <= start && extent_end > start)
+ *length += extent_end - start;
+ else if (key.offset > start && extent_end <= end)
+ *length += extent_end - key.offset;
+ else if (key.offset > start && key.offset <= end) {
+ *length += end - key.offset + 1;
+ break;
+ } else if (key.offset > end)
+ break;
+
+next:
+ path->slots[0]++;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
+ * find_free_dev_extent - find free space in the specified device
+ * @trans: transaction handler
+ * @device: the device which we search the free space in
+ * @num_bytes: the size of the free space that we need
+ * @start: store the start of the free space.
+ * @len: the size of the free space. that we find, or the size of the max
+ * free space if we don't find suitable free space
+ *
* this uses a pretty simple search, the expectation is that it is
* called very infrequently and that a given device has a small number
* of extents
+ *
+ * @start is used to store the start of the free space if we find. But if we
+ * don't find suitable free space, it will be used to store the start position
+ * of the max free space.
+ *
+ * @len is used to store the size of the free space that we find.
+ * But if we don't find suitable free space, it is used to store the size of
+ * the max free space.
*/
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *max_avail)
+ u64 *start, u64 *len)
{
struct btrfs_key key;
struct btrfs_root *root = device->dev_root;
- struct btrfs_dev_extent *dev_extent = NULL;
+ struct btrfs_dev_extent *dev_extent;
struct btrfs_path *path;
- u64 hole_size = 0;
- u64 last_byte = 0;
- u64 search_start = 0;
+ u64 hole_size;
+ u64 max_hole_start;
+ u64 max_hole_size;
+ u64 extent_end;
+ u64 search_start;
u64 search_end = device->total_bytes;
int ret;
- int slot = 0;
- int start_found;
+ int slot;
struct extent_buffer *l;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 2;
- start_found = 0;
-
/* FIXME use last free of some kind */
/* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
- search_start = max((u64)1024 * 1024, search_start);
+ search_start = 1024 * 1024;
- if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
+ if (root->fs_info->alloc_start + num_bytes <= search_end)
search_start = max(root->fs_info->alloc_start, search_start);
+ max_hole_start = search_start;
+ max_hole_size = 0;
+
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ path->reada = 2;
+
key.objectid = device->devid;
key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
+
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
if (ret < 0)
- goto error;
+ goto out;
if (ret > 0) {
ret = btrfs_previous_item(root, path, key.objectid, key.type);
if (ret < 0)
- goto error;
- if (ret > 0)
- start_found = 1;
+ goto out;
}
- l = path->nodes[0];
- btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+
while (1) {
l = path->nodes[0];
slot = path->slots[0];
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
if (ret == 0)
continue;
if (ret < 0)
- goto error;
-no_more_items:
- if (!start_found) {
- if (search_start >= search_end) {
- ret = -ENOSPC;
- goto error;
- }
- *start = search_start;
- start_found = 1;
- goto check_pending;
- }
- *start = last_byte > search_start ?
- last_byte : search_start;
- if (search_end <= *start) {
- ret = -ENOSPC;
- goto error;
- }
- goto check_pending;
+ goto out;
+
+ break;
}
btrfs_item_key_to_cpu(l, &key, slot);
@@ -815,48 +911,62 @@ no_more_items:
goto next;
if (key.objectid > device->devid)
- goto no_more_items;
+ break;
- if (key.offset >= search_start && key.offset > last_byte &&
- start_found) {
- if (last_byte < search_start)
- last_byte = search_start;
- hole_size = key.offset - last_byte;
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
- if (hole_size > *max_avail)
- *max_avail = hole_size;
+ if (key.offset > search_start) {
+ hole_size = key.offset - search_start;
- if (key.offset > last_byte &&
- hole_size >= num_bytes) {
- *start = last_byte;
- goto check_pending;
+ if (hole_size > max_hole_size) {
+ max_hole_start = search_start;
+ max_hole_size = hole_size;
+ }
+
+ /*
+ * If this free space is greater than which we need,
+ * it must be the max free space that we have found
+ * until now, so max_hole_start must point to the start
+ * of this free space and the length of this free space
+ * is stored in max_hole_size. Thus, we return
+ * max_hole_start and max_hole_size and go back to the
+ * caller.
+ */
+ if (hole_size >= num_bytes) {
+ ret = 0;
+ goto out;
}
}
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
- goto next;
- start_found = 1;
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+ extent_end = key.offset + btrfs_dev_extent_length(l,
+ dev_extent);
+ if (extent_end > search_start)
+ search_start = extent_end;
next:
path->slots[0]++;
cond_resched();
}
-check_pending:
- /* we have to make sure we didn't find an extent that has already
- * been allocated by the map tree or the original allocation
- */
- BUG_ON(*start < search_start);
- if (*start + num_bytes > search_end) {
- ret = -ENOSPC;
- goto error;
+ hole_size = search_end- search_start;
+ if (hole_size > max_hole_size) {
+ max_hole_start = search_start;
+ max_hole_size = hole_size;
}
- /* check for pending inserts here */
- ret = 0;
-error:
+ /* See above. */
+ if (hole_size < num_bytes)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+
+out:
btrfs_free_path(path);
+error:
+ *start = max_hole_start;
+ if (len)
+ *len = max_hole_size;
return ret;
}
@@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
set_blocksize(bdev, 4096);
bh = btrfs_read_dev_super(bdev);
if (!bh) {
- ret = -EIO;
+ ret = -EINVAL;
goto error_close;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
mutex_lock(&dev_root->fs_info->volume_mutex);
dev_root = dev_root->fs_info->dev_root;
@@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
return calc_size * num_stripes;
}
-static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct map_lookup **map_ret,
- u64 *num_bytes, u64 *stripe_size,
- u64 start, u64 type)
+/* Used to sort the devices by max_avail(descending sort) */
+int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
{
- struct btrfs_fs_info *info = extent_root->fs_info;
- struct btrfs_device *device = NULL;
- struct btrfs_fs_devices *fs_devices = info->fs_devices;
- struct list_head *cur;
- struct map_lookup *map = NULL;
- struct extent_map_tree *em_tree;
- struct extent_map *em;
- struct list_head private_devs;
- int min_stripe_size = 1 * 1024 * 1024;
- u64 calc_size = 1024 * 1024 * 1024;
- u64 max_chunk_size = calc_size;
- u64 min_free;
- u64 avail;
- u64 max_avail = 0;
- u64 dev_offset;
- int num_stripes = 1;
- int min_stripes = 1;
- int sub_stripes = 0;
- int looped = 0;
- int ret;
- int index;
- int stripe_len = 64 * 1024;
+ if (((struct btrfs_device_info *)dev_info1)->max_avail >
+ ((struct btrfs_device_info *)dev_info2)->max_avail)
+ return -1;
+ else if (((struct btrfs_device_info *)dev_info1)->max_avail <
+ ((struct btrfs_device_info *)dev_info2)->max_avail)
+ return 1;
+ else
+ return 0;
+}
- if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
- (type & BTRFS_BLOCK_GROUP_DUP)) {
- WARN_ON(1);
- type &= ~BTRFS_BLOCK_GROUP_DUP;
- }
- if (list_empty(&fs_devices->alloc_list))
- return -ENOSPC;
+static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
+ int *num_stripes, int *min_stripes,
+ int *sub_stripes)
+{
+ *num_stripes = 1;
+ *min_stripes = 1;
+ *sub_stripes = 0;
if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
- num_stripes = fs_devices->rw_devices;
- min_stripes = 2;
+ *num_stripes = fs_devices->rw_devices;
+ *min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_DUP)) {
- num_stripes = 2;
- min_stripes = 2;
+ *num_stripes = 2;
+ *min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
if (fs_devices->rw_devices < 2)
return -ENOSPC;
- num_stripes = 2;
- min_stripes = 2;
+ *num_stripes = 2;
+ *min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
- num_stripes = fs_devices->rw_devices;
- if (num_stripes < 4)
+ *num_stripes = fs_devices->rw_devices;
+ if (*num_stripes < 4)
return -ENOSPC;
- num_stripes &= ~(u32)1;
- sub_stripes = 2;
- min_stripes = 4;
+ *num_stripes &= ~(u32)1;
+ *sub_stripes = 2;
+ *min_stripes = 4;
}
+ return 0;
+}
+
+static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
+ u64 proposed_size, u64 type,
+ int num_stripes, int small_stripe)
+{
+ int min_stripe_size = 1 * 1024 * 1024;
+ u64 calc_size = proposed_size;
+ u64 max_chunk_size = calc_size;
+ int ncopies = 1;
+
+ if (type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID10))
+ ncopies = 2;
+
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_chunk_size = 10 * calc_size;
min_stripe_size = 64 * 1024 * 1024;
@@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
max_chunk_size);
-again:
- max_avail = 0;
- if (!map || map->num_stripes != num_stripes) {
- kfree(map);
- map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
- if (!map)
- return -ENOMEM;
- map->num_stripes = num_stripes;
- }
-
- if (calc_size * num_stripes > max_chunk_size) {
- calc_size = max_chunk_size;
+ if (calc_size * num_stripes > max_chunk_size * ncopies) {
+ calc_size = max_chunk_size * ncopies;
do_div(calc_size, num_stripes);
- do_div(calc_size, stripe_len);
- calc_size *= stripe_len;
+ do_div(calc_size, BTRFS_STRIPE_LEN);
+ calc_size *= BTRFS_STRIPE_LEN;
}
/* we don't want tiny stripes */
- if (!looped)
+ if (!small_stripe)
calc_size = max_t(u64, min_stripe_size, calc_size);
/*
- * we're about to do_div by the stripe_len so lets make sure
+ * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
* we end up with something bigger than a stripe
*/
- calc_size = max_t(u64, calc_size, stripe_len * 4);
+ calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);
+
+ do_div(calc_size, BTRFS_STRIPE_LEN);
+ calc_size *= BTRFS_STRIPE_LEN;
+
+ return calc_size;
+}
+
+static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
+ int num_stripes)
+{
+ struct map_lookup *new;
+ size_t len = map_lookup_size(num_stripes);
+
+ BUG_ON(map->num_stripes < num_stripes);
+
+ if (map->num_stripes == num_stripes)
+ return map;
+
+ new = kmalloc(len, GFP_NOFS);
+ if (!new) {
+ /* just change map->num_stripes */
+ map->num_stripes = num_stripes;
+ return map;
+ }
+
+ memcpy(new, map, len);
+ new->num_stripes = num_stripes;
+ kfree(map);
+ return new;
+}
+
+/*
+ * helper to allocate device space from btrfs_device_info, in which we stored
+ * max free space information of every device. It is used when we can not
+ * allocate chunks by default size.
+ *
+ * By this helper, we can allocate a new chunk as larger as possible.
+ */
+static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_devices *fs_devices,
+ struct btrfs_device_info *devices,
+ int nr_device, u64 type,
+ struct map_lookup **map_lookup,
+ int min_stripes, u64 *stripe_size)
+{
+ int i, index, sort_again = 0;
+ int min_devices = min_stripes;
+ u64 max_avail, min_free;
+ struct map_lookup *map = *map_lookup;
+ int ret;
+
+ if (nr_device < min_stripes)
+ return -ENOSPC;
+
+ btrfs_descending_sort_devices(devices, nr_device);
+
+ max_avail = devices[0].max_avail;
+ if (!max_avail)
+ return -ENOSPC;
+
+ for (i = 0; i < nr_device; i++) {
+ /*
+ * if dev_offset = 0, it means the free space of this device
+ * is less than what we need, and we didn't search max avail
+ * extent on this device, so do it now.
+ */
+ if (!devices[i].dev_offset) {
+ ret = find_free_dev_extent(trans, devices[i].dev,
+ max_avail,
+ &devices[i].dev_offset,
+ &devices[i].max_avail);
+ if (ret != 0 && ret != -ENOSPC)
+ return ret;
+ sort_again = 1;
+ }
+ }
+
+ /* we update the max avail free extent of each devices, sort again */
+ if (sort_again)
+ btrfs_descending_sort_devices(devices, nr_device);
+
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_devices = 1;
+
+ if (!devices[min_devices - 1].max_avail)
+ return -ENOSPC;
+
+ max_avail = devices[min_devices - 1].max_avail;
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ do_div(max_avail, 2);
+
+ max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
+ min_stripes, 1);
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_free = max_avail * 2;
+ else
+ min_free = max_avail;
+
+ if (min_free > devices[min_devices - 1].max_avail)
+ return -ENOSPC;
+
+ map = __shrink_map_lookup_stripes(map, min_stripes);
+ *stripe_size = max_avail;
+
+ index = 0;
+ for (i = 0; i < min_stripes; i++) {
+ map->stripes[i].dev = devices[index].dev;
+ map->stripes[i].physical = devices[index].dev_offset;
+ if (type & BTRFS_BLOCK_GROUP_DUP) {
+ i++;
+ map->stripes[i].dev = devices[index].dev;
+ map->stripes[i].physical = devices[index].dev_offset +
+ max_avail;
+ }
+ index++;
+ }
+ *map_lookup = map;
+
+ return 0;
+}
- do_div(calc_size, stripe_len);
- calc_size *= stripe_len;
+static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root,
+ struct map_lookup **map_ret,
+ u64 *num_bytes, u64 *stripe_size,
+ u64 start, u64 type)
+{
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct btrfs_device *device = NULL;
+ struct btrfs_fs_devices *fs_devices = info->fs_devices;
+ struct list_head *cur;
+ struct map_lookup *map;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ struct btrfs_device_info *devices_info;
+ struct list_head private_devs;
+ u64 calc_size = 1024 * 1024 * 1024;
+ u64 min_free;
+ u64 avail;
+ u64 dev_offset;
+ int num_stripes;
+ int min_stripes;
+ int sub_stripes;
+ int min_devices; /* the min number of devices we need */
+ int i;
+ int ret;
+ int index;
+
+ if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
+ (type & BTRFS_BLOCK_GROUP_DUP)) {
+ WARN_ON(1);
+ type &= ~BTRFS_BLOCK_GROUP_DUP;
+ }
+ if (list_empty(&fs_devices->alloc_list))
+ return -ENOSPC;
+
+ ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
+ &min_stripes, &sub_stripes);
+ if (ret)
+ return ret;
+
+ devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
+ GFP_NOFS);
+ if (!devices_info)
+ return -ENOMEM;
+
+ map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ map->num_stripes = num_stripes;
cur = fs_devices->alloc_list.next;
index = 0;
+ i = 0;
- if (type & BTRFS_BLOCK_GROUP_DUP)
+ calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
+ num_stripes, 0);
+
+ if (type & BTRFS_BLOCK_GROUP_DUP) {
min_free = calc_size * 2;
- else
+ min_devices = 1;
+ } else {
min_free = calc_size;
-
- /*
- * we add 1MB because we never use the first 1MB of the device, unless
- * we've looped, then we are likely allocating the maximum amount of
- * space left already
- */
- if (!looped)
- min_free += 1024 * 1024;
+ min_devices = min_stripes;
+ }
INIT_LIST_HEAD(&private_devs);
while (index < num_stripes) {
@@ -2287,27 +2559,39 @@ again:
cur = cur->next;
if (device->in_fs_metadata && avail >= min_free) {
- ret = find_free_dev_extent(trans, device,
- min_free, &dev_offset,
- &max_avail);
+ ret = find_free_dev_extent(trans, device, min_free,
+ &devices_info[i].dev_offset,
+ &devices_info[i].max_avail);
if (ret == 0) {
list_move_tail(&device->dev_alloc_list,
&private_devs);
map->stripes[index].dev = device;
- map->stripes[index].physical = dev_offset;
+ map->stripes[index].physical =
+ devices_info[i].dev_offset;
index++;
if (type & BTRFS_BLOCK_GROUP_DUP) {
map->stripes[index].dev = device;
map->stripes[index].physical =
- dev_offset + calc_size;
+ devices_info[i].dev_offset +
+ calc_size;
index++;
}
- }
- } else if (device->in_fs_metadata && avail > max_avail)
- max_avail = avail;
+ } else if (ret != -ENOSPC)
+ goto error;
+
+ devices_info[i].dev = device;
+ i++;
+ } else if (device->in_fs_metadata &&
+ avail >= BTRFS_STRIPE_LEN) {
+ devices_info[i].dev = device;
+ devices_info[i].max_avail = avail;
+ i++;
+ }
+
if (cur == &fs_devices->alloc_list)
break;
}
+
list_splice(&private_devs, &fs_devices->alloc_list);
if (index < num_stripes) {
if (index >= min_stripes) {
@@ -2316,34 +2600,36 @@ again:
num_stripes /= sub_stripes;
num_stripes *= sub_stripes;
}
- looped = 1;
- goto again;
- }
- if (!looped && max_avail > 0) {
- looped = 1;
- calc_size = max_avail;
- goto again;
+
+ map = __shrink_map_lookup_stripes(map, num_stripes);
+ } else if (i >= min_devices) {
+ ret = __btrfs_alloc_tiny_space(trans, fs_devices,
+ devices_info, i, type,
+ &map, min_stripes,
+ &calc_size);
+ if (ret)
+ goto error;
+ } else {
+ ret = -ENOSPC;
+ goto error;
}
- kfree(map);
- return -ENOSPC;
}
map->sector_size = extent_root->sectorsize;
- map->stripe_len = stripe_len;
- map->io_align = stripe_len;
- map->io_width = stripe_len;
+ map->stripe_len = BTRFS_STRIPE_LEN;
+ map->io_align = BTRFS_STRIPE_LEN;
+ map->io_width = BTRFS_STRIPE_LEN;
map->type = type;
- map->num_stripes = num_stripes;
map->sub_stripes = sub_stripes;
*map_ret = map;
*stripe_size = calc_size;
*num_bytes = chunk_bytes_by_type(type, calc_size,
- num_stripes, sub_stripes);
+ map->num_stripes, sub_stripes);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
- kfree(map);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
em->bdev = (struct block_device *)map;
em->start = start;
@@ -2376,7 +2662,13 @@ again:
index++;
}
+ kfree(devices_info);
return 0;
+
+error:
+ kfree(map);
+ kfree(devices_info);
+ return ret;
}
static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 1be781079450..7fb59d45fe8c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -20,8 +20,11 @@
#define __BTRFS_VOLUMES_
#include <linux/bio.h>
+#include <linux/sort.h>
#include "async-thread.h"
+#define BTRFS_STRIPE_LEN (64 * 1024)
+
struct buffer_head;
struct btrfs_pending_bios {
struct bio *head;
@@ -136,6 +139,30 @@ struct btrfs_multi_bio {
struct btrfs_bio_stripe stripes[];
};
+struct btrfs_device_info {
+ struct btrfs_device *dev;
+ u64 dev_offset;
+ u64 max_avail;
+};
+
+/* Used to sort the devices by max_avail(descending sort) */
+int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
+
+/*
+ * sort the devices by max_avail, in which max free extent size of each device
+ * is stored.(Descending Sort)
+ */
+static inline void btrfs_descending_sort_devices(
+ struct btrfs_device_info *devices,
+ size_t nr_devices)
+{
+ sort(devices, nr_devices, sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_free_bytes, NULL);
+}
+
+int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
+ u64 end, u64 *length);
+
#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 698fdd2c739c..a5776531dc2b 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
+ struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+
+ /*
+ * The permission on security.* and system.* is not checked
+ * in permission().
+ */
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
@@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
+ struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+
+ /*
+ * The permission on security.* and system.* is not checked
+ * in permission().
+ */
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index b9cd5445f71c..f5ec2d44150d 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -32,15 +32,6 @@
#include <linux/bio.h>
#include "compression.h"
-/* Plan: call deflate() with avail_in == *sourcelen,
- avail_out = *dstlen - 12 and flush == Z_FINISH.
- If it doesn't manage to finish, call it again with
- avail_in == 0 and avail_out set to the remaining 12
- bytes for it to clean up.
- Q: Is 12 bytes sufficient?
-*/
-#define STREAM_END_SPACE 12
-
struct workspace {
z_stream inf_strm;
z_stream def_strm;
@@ -48,152 +39,51 @@ struct workspace {
struct list_head list;
};
-static LIST_HEAD(idle_workspace);
-static DEFINE_SPINLOCK(workspace_lock);
-static unsigned long num_workspace;
-static atomic_t alloc_workspace = ATOMIC_INIT(0);
-static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
+static void zlib_free_workspace(struct list_head *ws)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
-/*
- * this finds an available zlib workspace or allocates a new one
- * NULL or an ERR_PTR is returned if things go bad.
- */
-static struct workspace *find_zlib_workspace(void)
+ vfree(workspace->def_strm.workspace);
+ vfree(workspace->inf_strm.workspace);
+ kfree(workspace->buf);
+ kfree(workspace);
+}
+
+static struct list_head *zlib_alloc_workspace(void)
{
struct workspace *workspace;
- int ret;
- int cpus = num_online_cpus();
-
-again:
- spin_lock(&workspace_lock);
- if (!list_empty(&idle_workspace)) {
- workspace = list_entry(idle_workspace.next, struct workspace,
- list);
- list_del(&workspace->list);
- num_workspace--;
- spin_unlock(&workspace_lock);
- return workspace;
- }
- spin_unlock(&workspace_lock);
- if (atomic_read(&alloc_workspace) > cpus) {
- DEFINE_WAIT(wait);
- prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
- if (atomic_read(&alloc_workspace) > cpus)
- schedule();
- finish_wait(&workspace_wait, &wait);
- goto again;
- }
- atomic_inc(&alloc_workspace);
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
- if (!workspace) {
- ret = -ENOMEM;
- goto fail;
- }
+ if (!workspace)
+ return ERR_PTR(-ENOMEM);
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
- if (!workspace->def_strm.workspace) {
- ret = -ENOMEM;
- goto fail;
- }
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
- if (!workspace->inf_strm.workspace) {
- ret = -ENOMEM;
- goto fail_inflate;
- }
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
- if (!workspace->buf) {
- ret = -ENOMEM;
- goto fail_kmalloc;
- }
- return workspace;
-
-fail_kmalloc:
- vfree(workspace->inf_strm.workspace);
-fail_inflate:
- vfree(workspace->def_strm.workspace);
-fail:
- kfree(workspace);
- atomic_dec(&alloc_workspace);
- wake_up(&workspace_wait);
- return ERR_PTR(ret);
-}
-
-/*
- * put a workspace struct back on the list or free it if we have enough
- * idle ones sitting around
- */
-static int free_workspace(struct workspace *workspace)
-{
- spin_lock(&workspace_lock);
- if (num_workspace < num_online_cpus()) {
- list_add_tail(&workspace->list, &idle_workspace);
- num_workspace++;
- spin_unlock(&workspace_lock);
- if (waitqueue_active(&workspace_wait))
- wake_up(&workspace_wait);
- return 0;
- }
- spin_unlock(&workspace_lock);
- vfree(workspace->def_strm.workspace);
- vfree(workspace->inf_strm.workspace);
- kfree(workspace->buf);
- kfree(workspace);
+ if (!workspace->def_strm.workspace ||
+ !workspace->inf_strm.workspace || !workspace->buf)
+ goto fail;
- atomic_dec(&alloc_workspace);
- if (waitqueue_active(&workspace_wait))
- wake_up(&workspace_wait);
- return 0;
-}
+ INIT_LIST_HEAD(&workspace->list);
-/*
- * cleanup function for module exit
- */
-static void free_workspaces(void)
-{
- struct workspace *workspace;
- while (!list_empty(&idle_workspace)) {
- workspace = list_entry(idle_workspace.next, struct workspace,
- list);
- list_del(&workspace->list);
- vfree(workspace->def_strm.workspace);
- vfree(workspace->inf_strm.workspace);
- kfree(workspace->buf);
- kfree(workspace);
- atomic_dec(&alloc_workspace);
- }
+ return &workspace->list;
+fail:
+ zlib_free_workspace(&workspace->list);
+ return ERR_PTR(-ENOMEM);
}
-/*
- * given an address space and start/len, compress the bytes.
- *
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
- *
- * out_pages is used to return the number of pages allocated. There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read. It
- * may be smaller then len if we had to exit early because we
- * ran out of room in the pages array or because we cross the
- * max_out threshold.
- *
- * total_out is used to return the total number of compressed bytes
- *
- * max_out tells us the max number of bytes that we're allowed to
- * stuff into pages
- */
-int btrfs_zlib_compress_pages(struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
+static int zlib_compress_pages(struct list_head *ws,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret;
- struct workspace *workspace;
char *data_in;
char *cpage_out;
int nr_pages = 0;
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
*total_out = 0;
*total_in = 0;
- workspace = find_zlib_workspace();
- if (IS_ERR(workspace))
- return -1;
-
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
printk(KERN_WARNING "deflateInit failed\n");
ret = -1;
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
data_in = kmap(in_page);
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -1;
+ goto out;
+ }
cpage_out = kmap(out_page);
pages[0] = out_page;
nr_pages = 1;
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
goto out;
}
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -1;
+ goto out;
+ }
cpage_out = kmap(out_page);
pages[nr_pages] = out_page;
nr_pages++;
@@ -314,55 +208,26 @@ out:
kunmap(in_page);
page_cache_release(in_page);
}
- free_workspace(workspace);
return ret;
}
-/*
- * pages_in is an array of pages with compressed data.
- *
- * disk_start is the starting logical offset of this array in the file
- *
- * bvec is a bio_vec of pages from the file that we want to decompress into
- *
- * vcnt is the count of pages in the biovec
- *
- * srclen is the number of bytes in pages_in
- *
- * The basic idea is that we have a bio that was created by readpages.
- * The pages in the bio are for the uncompressed data, and they may not
- * be contiguous. They all correspond to the range of bytes covered by
- * the compressed extent.
- */
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen)
+static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen)
{
- int ret = 0;
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ int ret = 0, ret2;
int wbits = MAX_WBITS;
- struct workspace *workspace;
char *data_in;
size_t total_out = 0;
- unsigned long page_bytes_left;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
- struct page *page_out;
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE;
unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
- unsigned long working_bytes;
unsigned long pg_offset;
- unsigned long start_byte;
- unsigned long current_buf_start;
- char *kaddr;
-
- workspace = find_zlib_workspace();
- if (IS_ERR(workspace))
- return -ENOMEM;
data_in = kmap(pages_in[page_in_index]);
workspace->inf_strm.next_in = data_in;
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
workspace->inf_strm.total_out = 0;
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
- page_out = bvec[page_out_index].bv_page;
- page_bytes_left = PAGE_CACHE_SIZE;
pg_offset = 0;
/* If it's deflate, and it's got no preset dictionary, then
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
printk(KERN_WARNING "inflateInit failed\n");
- ret = -1;
- goto out;
+ return -1;
}
while (workspace->inf_strm.total_in < srclen) {
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END)
break;
- /*
- * buf start is the byte offset we're of the start of
- * our workspace buffer
- */
- buf_start = total_out;
- /* total_out is the last byte of the workspace buffer */
+ buf_start = total_out;
total_out = workspace->inf_strm.total_out;
- working_bytes = total_out - buf_start;
-
- /*
- * start byte is the first byte of the page we're currently
- * copying into relative to the start of the compressed data.
- */
- start_byte = page_offset(page_out) - disk_start;
-
- if (working_bytes == 0) {
- /* we didn't make progress in this inflate
- * call, we're done
- */
- if (ret != Z_STREAM_END)
- ret = -1;
+ /* we didn't make progress in this inflate call, we're done */
+ if (buf_start == total_out)
break;
- }
- /* we haven't yet hit data corresponding to this page */
- if (total_out <= start_byte)
- goto next;
-
- /*
- * the start of the data we care about is offset into
- * the middle of our working buffer
- */
- if (total_out > start_byte && buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes -= buf_offset;
- } else {
- buf_offset = 0;
- }
- current_buf_start = buf_start;
-
- /* copy bytes from the working buffer into the pages */
- while (working_bytes > 0) {
- bytes = min(PAGE_CACHE_SIZE - pg_offset,
- PAGE_CACHE_SIZE - buf_offset);
- bytes = min(bytes, working_bytes);
- kaddr = kmap_atomic(page_out, KM_USER0);
- memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
- bytes);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(page_out);
-
- pg_offset += bytes;
- page_bytes_left -= bytes;
- buf_offset += bytes;
- working_bytes -= bytes;
- current_buf_start += bytes;
-
- /* check if we need to pick another page */
- if (page_bytes_left == 0) {
- page_out_index++;
- if (page_out_index >= vcnt) {
- ret = 0;
- goto done;
- }
-
- page_out = bvec[page_out_index].bv_page;
- pg_offset = 0;
- page_bytes_left = PAGE_CACHE_SIZE;
- start_byte = page_offset(page_out) - disk_start;
-
- /*
- * make sure our new page is covered by this
- * working buffer
- */
- if (total_out <= start_byte)
- goto next;
-
- /* the next page in the biovec might not
- * be adjacent to the last page, but it
- * might still be found inside this working
- * buffer. bump our offset pointer
- */
- if (total_out > start_byte &&
- current_buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes = total_out - start_byte;
- current_buf_start = buf_start +
- buf_offset;
- }
- }
+ ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
+ total_out, disk_start,
+ bvec, vcnt,
+ &page_out_index, &pg_offset);
+ if (ret2 == 0) {
+ ret = 0;
+ goto done;
}
-next:
+
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
@@ -516,35 +301,21 @@ done:
zlib_inflateEnd(&workspace->inf_strm);
if (data_in)
kunmap(pages_in[page_in_index]);
-out:
- free_workspace(workspace);
return ret;
}
-/*
- * a less complex decompression routine. Our compressed data fits in a
- * single page, and we want to read a single page out of it.
- * start_byte tells us the offset into the compressed data we're interested in
- */
-int btrfs_zlib_decompress(unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
+static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen)
{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
int wbits = MAX_WBITS;
- struct workspace *workspace;
unsigned long bytes_left = destlen;
unsigned long total_out = 0;
char *kaddr;
- if (destlen > PAGE_CACHE_SIZE)
- return -ENOMEM;
-
- workspace = find_zlib_workspace();
- if (IS_ERR(workspace))
- return -ENOMEM;
-
workspace->inf_strm.next_in = data_in;
workspace->inf_strm.avail_in = srclen;
workspace->inf_strm.total_in = 0;
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
printk(KERN_WARNING "inflateInit failed\n");
- ret = -1;
- goto out;
+ return -1;
}
while (bytes_left > 0) {
@@ -616,12 +386,13 @@ next:
ret = 0;
zlib_inflateEnd(&workspace->inf_strm);
-out:
- free_workspace(workspace);
return ret;
}
-void btrfs_zlib_exit(void)
-{
- free_workspaces();
-}
+struct btrfs_compress_op btrfs_zlib_compress = {
+ .alloc_workspace = zlib_alloc_workspace,
+ .free_workspace = zlib_free_workspace,
+ .compress_pages = zlib_compress_pages,
+ .decompress_biovec = zlib_decompress_biovec,
+ .decompress = zlib_decompress,
+};
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a65d311d163a..9f59887badd2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1113,6 +1113,8 @@ cifs_parse_mount_options(char *options, const char *devname,
} else if (!strnicmp(data, "uid", 3) && value && *value) {
vol->linux_uid = simple_strtoul(value, &value, 0);
uid_specified = true;
+ } else if (!strnicmp(data, "cruid", 5) && value && *value) {
+ vol->cred_uid = simple_strtoul(value, &value, 0);
} else if (!strnicmp(data, "forceuid", 8)) {
override_uid = 1;
} else if (!strnicmp(data, "noforceuid", 10)) {
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 9aad47a2d62f..6783ce6cdc89 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -899,8 +899,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
}
/* else ERRHRD class errors or junk - return EIO */
- cFYI(1, "Mapping smb error code %d to POSIX err %d",
- smberrcode, rc);
+ cFYI(1, "Mapping smb error code 0x%x to POSIX err %d",
+ le32_to_cpu(smb->Status.CifsError), rc);
/* generic corrective action e.g. reconnect SMB session on
* ERRbaduid could be added */
diff --git a/fs/compat.c b/fs/compat.c
index eb1740ac8c0a..f6fd0a00e6cc 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -257,7 +257,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
}
/*
- * The following statfs calls are copies of code from fs/open.c and
+ * The following statfs calls are copies of code from fs/statfs.c and
* should be checked against those from time to time
*/
asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
@@ -320,7 +320,9 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
__put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
__put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
__put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
- __put_user(kbuf->f_frsize, &ubuf->f_frsize))
+ __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
+ __put_user(kbuf->f_flags, &ubuf->f_flags) ||
+ __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
return -EFAULT;
return 0;
}
@@ -597,10 +599,8 @@ ssize_t compat_rw_copy_check_uvector(int type,
if (nr_segs > fast_segs) {
ret = -ENOMEM;
iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
- if (iov == NULL) {
- *ret_pointer = fast_pointer;
+ if (iov == NULL)
goto out;
- }
}
*ret_pointer = iov;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index cbadc1bee6e7..bfd8b680e648 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -348,7 +348,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
BUG_ON(!crypt_stat || !crypt_stat->tfm
|| !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
+ ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
@@ -413,10 +413,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
if (rc) {
- ecryptfs_printk(KERN_ERR, "Error attempting to "
- "derive IV for extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
+ "extent [0x%.16llx]; rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
goto out;
}
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -443,9 +442,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
}
rc = 0;
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; "
+ "rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
"encryption:\n");
ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
@@ -540,10 +539,9 @@ static int ecryptfs_decrypt_extent(struct page *page,
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
if (rc) {
- ecryptfs_printk(KERN_ERR, "Error attempting to "
- "derive IV for extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
+ "extent [0x%.16llx]; rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
goto out;
}
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -571,9 +569,9 @@ static int ecryptfs_decrypt_extent(struct page *page,
}
rc = 0;
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; "
+ "rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
"decryption:\n");
ecryptfs_dump_hex((char *)(page_address(page)
@@ -780,7 +778,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
}
ecryptfs_printk(KERN_DEBUG,
"Initializing cipher [%s]; strlen = [%d]; "
- "key_size_bits = [%d]\n",
+ "key_size_bits = [%zd]\n",
crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
crypt_stat->key_size << 3);
if (crypt_stat->tfm) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 413a3c48f0bb..dbc84ed96336 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -192,7 +192,6 @@ ecryptfs_get_key_payload_data(struct key *key)
(((struct user_key_payload*)key->payload.data)->data);
}
-#define ECRYPTFS_SUPER_MAGIC 0xf15f
#define ECRYPTFS_MAX_KEYSET_SIZE 1024
#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
@@ -584,6 +583,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
#define ecryptfs_printk(type, fmt, arg...) \
__ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
+__attribute__ ((format(printf, 1, 2)))
void __ecryptfs_printk(const char *fmt, ...);
extern const struct file_operations ecryptfs_main_fops;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 91da02987bff..81e10e6a9443 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -47,7 +47,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- int rc;
+ ssize_t rc;
struct dentry *lower_dentry;
struct vfsmount *lower_vfsmount;
struct file *file = iocb->ki_filp;
@@ -191,18 +191,16 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
| ECRYPTFS_ENCRYPTED);
}
mutex_unlock(&crypt_stat->cs_mutex);
- if (!ecryptfs_inode_to_private(inode)->lower_file) {
- rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize "
- "the persistent file for the dentry with name "
- "[%s]; rc = [%d]\n", __func__,
- ecryptfs_dentry->d_name.name, rc);
- goto out_free;
- }
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out_free;
}
- if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
- && !(file->f_flags & O_RDONLY)) {
+ if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
+ == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
rc = -EPERM;
printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
"file must hence be opened RO\n", __func__);
@@ -243,9 +241,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
}
}
mutex_unlock(&crypt_stat->cs_mutex);
- ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
- "size: [0x%.16x]\n", inode, inode->i_ino,
- i_size_read(inode));
+ ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
+ "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
+ (unsigned long long)i_size_read(inode));
goto out;
out_free:
kmem_cache_free(ecryptfs_file_info_cache,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 64ff02330752..bd33f87a1907 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -185,15 +185,13 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
"context; rc = [%d]\n", rc);
goto out;
}
- if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
- rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize "
- "the persistent file for the dentry with name "
- "[%s]; rc = [%d]\n", __func__,
- ecryptfs_dentry->d_name.name, rc);
- goto out;
- }
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out;
}
rc = ecryptfs_write_metadata(ecryptfs_dentry);
if (rc) {
@@ -302,15 +300,13 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
rc = -ENOMEM;
goto out;
}
- if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
- rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize "
- "the persistent file for the dentry with name "
- "[%s]; rc = [%d]\n", __func__,
- ecryptfs_dentry->d_name.name, rc);
- goto out_free_kmem;
- }
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out_free_kmem;
}
crypt_stat = &ecryptfs_inode_to_private(
ecryptfs_dentry->d_inode)->crypt_stat;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index b1f6858a5223..c1436cff6f2d 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -59,7 +59,7 @@ static int process_request_key_err(long err_code)
break;
default:
ecryptfs_printk(KERN_WARNING, "Unknown error code: "
- "[0x%.16x]\n", err_code);
+ "[0x%.16lx]\n", err_code);
rc = -EINVAL;
}
return rc;
@@ -130,7 +130,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size,
} else {
rc = -EINVAL;
ecryptfs_printk(KERN_WARNING,
- "Unsupported packet size: [%d]\n", size);
+ "Unsupported packet size: [%zd]\n", size);
}
return rc;
}
@@ -1672,7 +1672,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
auth_tok->session_key.decrypted_key_size);
crypt_stat->flags |= ECRYPTFS_KEY_VALID;
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n",
+ ecryptfs_printk(KERN_DEBUG, "FEK of size [%zd]:\n",
crypt_stat->key_size);
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
@@ -1754,7 +1754,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) {
ecryptfs_printk(KERN_ERR, "Expected "
"signature of size [%d]; "
- "read size [%d]\n",
+ "read size [%zd]\n",
ECRYPTFS_SIG_SIZE,
tag_11_contents_size);
rc = -EIO;
@@ -1787,8 +1787,8 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
goto out_wipe_list;
break;
default:
- ecryptfs_printk(KERN_DEBUG, "No packet at offset "
- "[%d] of the file header; hex value of "
+ ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] "
+ "of the file header; hex value of "
"character is [0x%.2x]\n", i, src[i]);
next_packet_is_auth_tok_packet = 0;
}
@@ -1864,8 +1864,8 @@ found_matching_auth_tok:
"session key for authentication token with sig "
"[%.*s]; rc = [%d]. Removing auth tok "
"candidate from the list and searching for "
- "the next match.\n", candidate_auth_tok_sig,
- ECRYPTFS_SIG_SIZE_HEX, rc);
+ "the next match.\n", ECRYPTFS_SIG_SIZE_HEX,
+ candidate_auth_tok_sig, rc);
list_for_each_entry_safe(auth_tok_list_item,
auth_tok_list_item_tmp,
&auth_tok_list, list) {
@@ -2168,7 +2168,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
if (encrypted_session_key_valid) {
ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; "
"using auth_tok->session_key.encrypted_key, "
- "where key_rec->enc_key_size = [%d]\n",
+ "where key_rec->enc_key_size = [%zd]\n",
key_rec->enc_key_size);
memcpy(key_rec->enc_key,
auth_tok->session_key.encrypted_key,
@@ -2198,7 +2198,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
if (rc < 1 || rc > 2) {
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat session key; expected rc = 1; "
- "got rc = [%d]. key_rec->enc_key_size = [%d]\n",
+ "got rc = [%d]. key_rec->enc_key_size = [%zd]\n",
rc, key_rec->enc_key_size);
rc = -ENOMEM;
goto out;
@@ -2209,7 +2209,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat encrypted session key; "
"expected rc = 1; got rc = [%d]. "
- "key_rec->enc_key_size = [%d]\n", rc,
+ "key_rec->enc_key_size = [%zd]\n", rc,
key_rec->enc_key_size);
rc = -ENOMEM;
goto out;
@@ -2224,7 +2224,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
rc = 0;
- ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
+ ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
crypt_stat->key_size);
rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
(*key_rec).enc_key_size);
@@ -2235,7 +2235,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
}
ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
if (ecryptfs_verbosity > 0) {
- ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n",
+ ecryptfs_printk(KERN_DEBUG, "EFEK of size [%zd]:\n",
key_rec->enc_key_size);
ecryptfs_dump_hex(key_rec->enc_key,
key_rec->enc_key_size);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d3b28abdd6aa..758323a0f09a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -36,6 +36,7 @@
#include <linux/parser.h>
#include <linux/fs_stack.h>
#include <linux/slab.h>
+#include <linux/magic.h>
#include "ecryptfs_kernel.h"
/**
@@ -564,6 +565,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
+ s->s_magic = ECRYPTFS_SUPER_MAGIC;
inode = ecryptfs_get_inode(path.dentry->d_inode, s);
rc = PTR_ERR(inode);
@@ -808,9 +810,10 @@ static int __init ecryptfs_init(void)
ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
"larger than the host's page size, and so "
"eCryptfs cannot run on this system. The "
- "default eCryptfs extent size is [%d] bytes; "
- "the page size is [%d] bytes.\n",
- ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE);
+ "default eCryptfs extent size is [%u] bytes; "
+ "the page size is [%lu] bytes.\n",
+ ECRYPTFS_DEFAULT_EXTENT_SIZE,
+ (unsigned long)PAGE_CACHE_SIZE);
goto out;
}
rc = ecryptfs_init_kmem_caches();
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index b1d82756544b..cc64fca89f8d 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -65,7 +65,7 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
rc = ecryptfs_encrypt_page(page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting "
- "page (upper index [0x%.16x])\n", page->index);
+ "page (upper index [0x%.16lx])\n", page->index);
ClearPageUptodate(page);
goto out;
}
@@ -237,7 +237,7 @@ out:
ClearPageUptodate(page);
else
SetPageUptodate(page);
- ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
+ ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16lx]\n",
page->index);
unlock_page(page);
return rc;
@@ -290,6 +290,7 @@ static int ecryptfs_write_begin(struct file *file,
return -ENOMEM;
*pagep = page;
+ prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
if (!PageUptodate(page)) {
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
@@ -335,18 +336,23 @@ static int ecryptfs_write_begin(struct file *file,
SetPageUptodate(page);
}
} else {
- rc = ecryptfs_decrypt_page(page);
- if (rc) {
- printk(KERN_ERR "%s: Error decrypting page "
- "at index [%ld]; rc = [%d]\n",
- __func__, page->index, rc);
- ClearPageUptodate(page);
- goto out;
+ if (prev_page_end_size
+ >= i_size_read(page->mapping->host)) {
+ zero_user(page, 0, PAGE_CACHE_SIZE);
+ } else {
+ rc = ecryptfs_decrypt_page(page);
+ if (rc) {
+ printk(KERN_ERR "%s: Error decrypting "
+ "page at index [%ld]; "
+ "rc = [%d]\n",
+ __func__, page->index, rc);
+ ClearPageUptodate(page);
+ goto out;
+ }
}
SetPageUptodate(page);
}
}
- prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
/* If creating a page or more of holes, zero them out via truncate.
* Note, this will increase i_size. */
if (index != 0) {
@@ -488,7 +494,7 @@ static int ecryptfs_write_end(struct file *file,
} else
ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
- "(page w/ index = [0x%.16x], to = [%d])\n", index, to);
+ "(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
to);
@@ -503,19 +509,20 @@ static int ecryptfs_write_end(struct file *file,
rc = fill_zeros_to_end_of_page(page, to);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
- "zeros in page with index = [0x%.16x]\n", index);
+ "zeros in page with index = [0x%.16lx]\n", index);
goto out;
}
rc = ecryptfs_encrypt_page(page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
- "index [0x%.16x])\n", index);
+ "index [0x%.16lx])\n", index);
goto out;
}
if (pos + copied > i_size_read(ecryptfs_inode)) {
i_size_write(ecryptfs_inode, pos + copied);
ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
- "[0x%.16x]\n", i_size_read(ecryptfs_inode));
+ "[0x%.16llx]\n",
+ (unsigned long long)i_size_read(ecryptfs_inode));
}
rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
if (rc)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1de65f572033..0c8d97b56f34 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2065,7 +2065,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
extern void ext4_ext_truncate(struct inode *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
-extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c4068f6abf03..63a75810b7c3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3627,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
}
/*
- * preallocate space for a file. This implements ext4's fallocate inode
+ * preallocate space for a file. This implements ext4's fallocate file
* operation, which gets called from sys_fallocate system call.
* For block-mapped files, posix_fallocate should fall back to the method
* of writing zeroes to the required new blocks (the same behavior which is
* expected for file systems which do not support fallocate() system call).
*/
-long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
+long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
handle_t *handle;
loff_t new_size;
unsigned int max_blocks;
@@ -3645,7 +3646,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
unsigned int credits, blkbits = inode->i_blkbits;
/* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode && (mode != FALLOC_FL_KEEP_SIZE))
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
/*
@@ -3655,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP;
- /* preallocation to directories is currently not supported */
- if (S_ISDIR(inode->i_mode))
- return -ENODEV;
-
map.m_lblk = offset >> blkbits;
/*
* We can't just convert len to max_blocks because
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bb003dc9ffff..2e8322c8aa88 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -210,6 +210,7 @@ const struct file_operations ext4_file_operations = {
.fsync = ext4_sync_file,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
+ .fallocate = ext4_fallocate,
};
const struct inode_operations ext4_file_inode_operations = {
@@ -223,7 +224,6 @@ const struct inode_operations ext4_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.check_acl = ext4_check_acl,
- .fallocate = ext4_fallocate,
.fiemap = ext4_fiemap,
};
diff --git a/fs/file_table.c b/fs/file_table.c
index c3dee381f1b4..c3e89adf53c0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -311,7 +311,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
struct files_struct *files = current->files;
*fput_needed = 0;
- if (likely((atomic_read(&files->count) == 1))) {
+ if (atomic_read(&files->count) == 1) {
file = fcheck_files(files, fd);
} else {
rcu_read_lock();
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index fca6689e12e6..7cfdcb913363 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -19,6 +19,8 @@
#include <linux/fs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/ext2_fs.h>
+#include <linux/falloc.h>
+#include <linux/swap.h>
#include <linux/crc32.h>
#include <linux/writeback.h>
#include <asm/uaccess.h>
@@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
+static void empty_write_end(struct page *page, unsigned from,
+ unsigned to)
+{
+ struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+
+ page_zero_new_buffers(page, from, to);
+ flush_dcache_page(page);
+ mark_page_accessed(page);
+
+ if (!gfs2_is_writeback(ip))
+ gfs2_page_add_databufs(ip, page, from, to);
+
+ block_commit_write(page, from, to);
+}
+
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+ unsigned start, end, next;
+ struct buffer_head *bh, *head;
+ int error;
+
+ if (!page_has_buffers(page)) {
+ error = __block_write_begin(page, from, to - from, gfs2_block_map);
+ if (unlikely(error))
+ return error;
+
+ empty_write_end(page, from, to);
+ return 0;
+ }
+
+ bh = head = page_buffers(page);
+ next = end = 0;
+ while (next < from) {
+ next += bh->b_size;
+ bh = bh->b_this_page;
+ }
+ start = next;
+ do {
+ next += bh->b_size;
+ if (buffer_mapped(bh)) {
+ if (end) {
+ error = __block_write_begin(page, start, end - start,
+ gfs2_block_map);
+ if (unlikely(error))
+ return error;
+ empty_write_end(page, start, end);
+ end = 0;
+ }
+ start = next;
+ }
+ else
+ end = next;
+ bh = bh->b_this_page;
+ } while (next < to);
+
+ if (end) {
+ error = __block_write_begin(page, start, end - start, gfs2_block_map);
+ if (unlikely(error))
+ return error;
+ empty_write_end(page, start, end);
+ }
+
+ return 0;
+}
+
+static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
+ int mode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct buffer_head *dibh;
+ int error;
+ u64 start = offset >> PAGE_CACHE_SHIFT;
+ unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
+ u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ pgoff_t curr;
+ struct page *page;
+ unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
+ unsigned int from, to;
+
+ if (!end_offset)
+ end_offset = PAGE_CACHE_SIZE;
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (unlikely(error))
+ goto out;
+
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
+ if (gfs2_is_stuffed(ip)) {
+ error = gfs2_unstuff_dinode(ip, NULL);
+ if (unlikely(error))
+ goto out;
+ }
+
+ curr = start;
+ offset = start << PAGE_CACHE_SHIFT;
+ from = start_offset;
+ to = PAGE_CACHE_SIZE;
+ while (curr <= end) {
+ page = grab_cache_page_write_begin(inode->i_mapping, curr,
+ AOP_FLAG_NOFS);
+ if (unlikely(!page)) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ if (curr == end)
+ to = end_offset;
+ error = write_empty_blocks(page, from, to);
+ if (!error && offset + to > inode->i_size &&
+ !(mode & FALLOC_FL_KEEP_SIZE)) {
+ i_size_write(inode, offset + to);
+ }
+ unlock_page(page);
+ page_cache_release(page);
+ if (error)
+ goto out;
+ curr++;
+ offset += PAGE_CACHE_SIZE;
+ from = 0;
+ }
+
+ gfs2_dinode_out(ip, dibh->b_data);
+ mark_inode_dirty(inode);
+
+ brelse(dibh);
+
+out:
+ return error;
+}
+
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
+ unsigned int *data_blocks, unsigned int *ind_blocks)
+{
+ const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+ unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
+
+ for (tmp = max_data; tmp > sdp->sd_diptrs;) {
+ tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+ max_data -= tmp;
+ }
+ /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
+ so it might end up with fewer data blocks */
+ if (max_data <= *data_blocks)
+ return;
+ *data_blocks = max_data;
+ *ind_blocks = max_blocks - max_data;
+ *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
+ if (*len > max) {
+ *len = max;
+ gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
+ }
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_inode *ip = GFS2_I(inode);
+ unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+ loff_t bytes, max_bytes;
+ struct gfs2_alloc *al;
+ int error;
+ loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+ next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
+
+ /* We only support the FALLOC_FL_KEEP_SIZE mode */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
+ sdp->sd_sb.sb_bsize_shift;
+
+ len = next - offset;
+ bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
+ if (!bytes)
+ bytes = UINT_MAX;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+ error = gfs2_glock_nq(&ip->i_gh);
+ if (unlikely(error))
+ goto out_uninit;
+
+ if (!gfs2_write_alloc_required(ip, offset, len))
+ goto out_unlock;
+
+ while (len > 0) {
+ if (len < bytes)
+ bytes = len;
+ al = gfs2_alloc_get(ip);
+ if (!al) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
+
+ error = gfs2_quota_lock_check(ip);
+ if (error)
+ goto out_alloc_put;
+
+retry:
+ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+
+ al->al_requested = data_blocks + ind_blocks;
+ error = gfs2_inplace_reserve(ip);
+ if (error) {
+ if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
+ bytes >>= 1;
+ goto retry;
+ }
+ goto out_qunlock;
+ }
+ max_bytes = bytes;
+ calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+ al->al_requested = data_blocks + ind_blocks;
+
+ rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
+ RES_RG_HDR + gfs2_rg_blocks(al);
+ if (gfs2_is_jdata(ip))
+ rblocks += data_blocks ? data_blocks : 1;
+
+ error = gfs2_trans_begin(sdp, rblocks,
+ PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+ if (error)
+ goto out_trans_fail;
+
+ error = fallocate_chunk(inode, offset, max_bytes, mode);
+ gfs2_trans_end(sdp);
+
+ if (error)
+ goto out_trans_fail;
+
+ len -= max_bytes;
+ offset += max_bytes;
+ gfs2_inplace_release(ip);
+ gfs2_quota_unlock(ip);
+ gfs2_alloc_put(ip);
+ }
+ goto out_unlock;
+
+out_trans_fail:
+ gfs2_inplace_release(ip);
+out_qunlock:
+ gfs2_quota_unlock(ip);
+out_alloc_put:
+ gfs2_alloc_put(ip);
+out_unlock:
+ gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+ gfs2_holder_uninit(&ip->i_gh);
+ return error;
+}
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
/**
@@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = {
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.setlease = gfs2_setlease,
+ .fallocate = gfs2_fallocate,
};
const struct file_operations gfs2_dir_fops = {
@@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.setlease = generic_setlease,
+ .fallocate = gfs2_fallocate,
};
const struct file_operations gfs2_dir_fops_nolock = {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2232b3c780bd..7aa7d4f8984a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -74,16 +74,14 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
}
/**
- * GFS2 lookup code fills in vfs inode contents based on info obtained
- * from directory entry inside gfs2_inode_lookup(). This has caused issues
- * with NFS code path since its get_dentry routine doesn't have the relevant
- * directory entry when gfs2_inode_lookup() is invoked. Part of the code
- * segment inside gfs2_inode_lookup code needs to get moved around.
+ * gfs2_set_iop - Sets inode operations
+ * @inode: The inode with correct i_mode filled in
*
- * Clears I_NEW as well.
- **/
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup().
+ */
-void gfs2_set_iop(struct inode *inode)
+static void gfs2_set_iop(struct inode *inode)
{
struct gfs2_sbd *sdp = GFS2_SB(inode);
umode_t mode = inode->i_mode;
@@ -106,8 +104,6 @@ void gfs2_set_iop(struct inode *inode)
inode->i_op = &gfs2_file_iops;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
}
-
- unlock_new_inode(inode);
}
/**
@@ -119,10 +115,8 @@ void gfs2_set_iop(struct inode *inode)
* Returns: A VFS inode, or an error
*/
-struct inode *gfs2_inode_lookup(struct super_block *sb,
- unsigned int type,
- u64 no_addr,
- u64 no_formal_ino)
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
+ u64 no_addr, u64 no_formal_ino)
{
struct inode *inode;
struct gfs2_inode *ip;
@@ -152,51 +146,37 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail_iopen;
- ip->i_iopen_gh.gh_gl->gl_object = ip;
+ ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
io_gl = NULL;
- if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
- goto gfs2_nfsbypass;
-
- inode->i_mode = DT2IF(type);
-
- /*
- * We must read the inode in order to work out its type in
- * this case. Note that this doesn't happen often as we normally
- * know the type beforehand. This code path only occurs during
- * unlinked inode recovery (where it is safe to do this glock,
- * which is not true in the general case).
- */
if (type == DT_UNKNOWN) {
- struct gfs2_holder gh;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- if (unlikely(error))
- goto fail_glock;
- /* Inode is now uptodate */
- gfs2_glock_dq_uninit(&gh);
+ /* Inode glock must be locked already */
+ error = gfs2_inode_refresh(GFS2_I(inode));
+ if (error)
+ goto fail_refresh;
+ } else {
+ inode->i_mode = DT2IF(type);
}
gfs2_set_iop(inode);
+ unlock_new_inode(inode);
}
-gfs2_nfsbypass:
return inode;
-fail_glock:
- gfs2_glock_dq(&ip->i_iopen_gh);
+
+fail_refresh:
+ ip->i_iopen_gh.gh_gl->gl_object = NULL;
+ gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_iopen:
if (io_gl)
gfs2_glock_put(io_gl);
fail_put:
- if (inode->i_state & I_NEW)
- ip->i_gl->gl_object = NULL;
+ ip->i_gl->gl_object = NULL;
gfs2_glock_put(ip->i_gl);
fail:
- if (inode->i_state & I_NEW)
- iget_failed(inode);
- else
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(error);
}
@@ -221,14 +201,6 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
if (IS_ERR(inode))
goto fail;
- error = gfs2_inode_refresh(GFS2_I(inode));
- if (error)
- goto fail_iput;
-
- /* Pick up the works we bypass in gfs2_inode_lookup */
- if (inode->i_state & I_NEW)
- gfs2_set_iop(inode);
-
/* Two extra checks for NFS only */
if (no_formal_ino) {
error = -ESTALE;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 732a183efdb3..3e00a66e7cbd 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -96,7 +96,6 @@ err:
return -EIO;
}
-extern void gfs2_set_iop(struct inode *inode);
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
u64 no_addr, u64 no_formal_ino);
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 040b5a2e6556..d8b26ac2e20b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,8 +18,6 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/fiemap.h>
-#include <linux/swap.h>
-#include <linux/falloc.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
return ret;
}
-static void empty_write_end(struct page *page, unsigned from,
- unsigned to)
-{
- struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-
- page_zero_new_buffers(page, from, to);
- flush_dcache_page(page);
- mark_page_accessed(page);
-
- if (!gfs2_is_writeback(ip))
- gfs2_page_add_databufs(ip, page, from, to);
-
- block_commit_write(page, from, to);
-}
-
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
-{
- unsigned start, end, next;
- struct buffer_head *bh, *head;
- int error;
-
- if (!page_has_buffers(page)) {
- error = __block_write_begin(page, from, to - from, gfs2_block_map);
- if (unlikely(error))
- return error;
-
- empty_write_end(page, from, to);
- return 0;
- }
-
- bh = head = page_buffers(page);
- next = end = 0;
- while (next < from) {
- next += bh->b_size;
- bh = bh->b_this_page;
- }
- start = next;
- do {
- next += bh->b_size;
- if (buffer_mapped(bh)) {
- if (end) {
- error = __block_write_begin(page, start, end - start,
- gfs2_block_map);
- if (unlikely(error))
- return error;
- empty_write_end(page, start, end);
- end = 0;
- }
- start = next;
- }
- else
- end = next;
- bh = bh->b_this_page;
- } while (next < to);
-
- if (end) {
- error = __block_write_begin(page, start, end - start, gfs2_block_map);
- if (unlikely(error))
- return error;
- empty_write_end(page, start, end);
- }
-
- return 0;
-}
-
-static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
- int mode)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct buffer_head *dibh;
- int error;
- u64 start = offset >> PAGE_CACHE_SHIFT;
- unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
- u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
- pgoff_t curr;
- struct page *page;
- unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
- unsigned int from, to;
-
- if (!end_offset)
- end_offset = PAGE_CACHE_SIZE;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (unlikely(error))
- goto out;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
- if (gfs2_is_stuffed(ip)) {
- error = gfs2_unstuff_dinode(ip, NULL);
- if (unlikely(error))
- goto out;
- }
-
- curr = start;
- offset = start << PAGE_CACHE_SHIFT;
- from = start_offset;
- to = PAGE_CACHE_SIZE;
- while (curr <= end) {
- page = grab_cache_page_write_begin(inode->i_mapping, curr,
- AOP_FLAG_NOFS);
- if (unlikely(!page)) {
- error = -ENOMEM;
- goto out;
- }
-
- if (curr == end)
- to = end_offset;
- error = write_empty_blocks(page, from, to);
- if (!error && offset + to > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- i_size_write(inode, offset + to);
- }
- unlock_page(page);
- page_cache_release(page);
- if (error)
- goto out;
- curr++;
- offset += PAGE_CACHE_SIZE;
- from = 0;
- }
-
- gfs2_dinode_out(ip, dibh->b_data);
- mark_inode_dirty(inode);
-
- brelse(dibh);
-
-out:
- return error;
-}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
- unsigned int *data_blocks, unsigned int *ind_blocks)
-{
- const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
- unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
-
- for (tmp = max_data; tmp > sdp->sd_diptrs;) {
- tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
- max_data -= tmp;
- }
- /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
- so it might end up with fewer data blocks */
- if (max_data <= *data_blocks)
- return;
- *data_blocks = max_data;
- *ind_blocks = max_blocks - max_data;
- *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
- if (*len > max) {
- *len = max;
- gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
- }
-}
-
-static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
- loff_t len)
-{
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_inode *ip = GFS2_I(inode);
- unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
- loff_t bytes, max_bytes;
- struct gfs2_alloc *al;
- int error;
- loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
- next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
-
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode && (mode != FALLOC_FL_KEEP_SIZE))
- return -EOPNOTSUPP;
-
- offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
- sdp->sd_sb.sb_bsize_shift;
-
- len = next - offset;
- bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
- if (!bytes)
- bytes = UINT_MAX;
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
- error = gfs2_glock_nq(&ip->i_gh);
- if (unlikely(error))
- goto out_uninit;
-
- if (!gfs2_write_alloc_required(ip, offset, len))
- goto out_unlock;
-
- while (len > 0) {
- if (len < bytes)
- bytes = len;
- al = gfs2_alloc_get(ip);
- if (!al) {
- error = -ENOMEM;
- goto out_unlock;
- }
-
- error = gfs2_quota_lock_check(ip);
- if (error)
- goto out_alloc_put;
-
-retry:
- gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
-
- al->al_requested = data_blocks + ind_blocks;
- error = gfs2_inplace_reserve(ip);
- if (error) {
- if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
- bytes >>= 1;
- goto retry;
- }
- goto out_qunlock;
- }
- max_bytes = bytes;
- calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
- al->al_requested = data_blocks + ind_blocks;
-
- rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
- RES_RG_HDR + gfs2_rg_blocks(al);
- if (gfs2_is_jdata(ip))
- rblocks += data_blocks ? data_blocks : 1;
-
- error = gfs2_trans_begin(sdp, rblocks,
- PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
- if (error)
- goto out_trans_fail;
-
- error = fallocate_chunk(inode, offset, max_bytes, mode);
- gfs2_trans_end(sdp);
-
- if (error)
- goto out_trans_fail;
-
- len -= max_bytes;
- offset += max_bytes;
- gfs2_inplace_release(ip);
- gfs2_quota_unlock(ip);
- gfs2_alloc_put(ip);
- }
- goto out_unlock;
-
-out_trans_fail:
- gfs2_inplace_release(ip);
-out_qunlock:
- gfs2_quota_unlock(ip);
-out_alloc_put:
- gfs2_alloc_put(ip);
-out_unlock:
- gfs2_glock_dq(&ip->i_gh);
-out_uninit:
- gfs2_holder_uninit(&ip->i_gh);
- return error;
-}
-
-
static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
@@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = {
.getxattr = gfs2_getxattr,
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
- .fallocate = gfs2_fallocate,
.fiemap = gfs2_fiemap,
};
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 16c2ecac7eb7..ec73ed70bae1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1336,6 +1336,7 @@ static void gfs2_evict_inode(struct inode *inode)
if (error)
goto out_truncate;
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
error = gfs2_glock_nq(&ip->i_iopen_gh);
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f0da1cfd10..1ae35baa539e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -281,7 +281,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_size != i_size_read(inode)) {
error = vmtruncate(inode, attr->ia_size);
if (error)
- return error;
+ goto out_unlock;
}
setattr_copy(inode, attr);
diff --git a/fs/internal.h b/fs/internal.h
index 12ccb86edef7..0663568b1247 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,8 +70,7 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
extern void release_mounts(struct list_head *);
extern void umount_tree(struct vfsmount *, int, struct list_head *);
extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
-extern int do_add_mount(struct vfsmount *, struct path *, int);
-extern void mnt_clear_expiry(struct vfsmount *);
+extern int finish_automount(struct vfsmount *, struct path *);
extern void mnt_make_longterm(struct vfsmount *);
extern void mnt_make_shortterm(struct vfsmount *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d6cc16476620..a59635e295fa 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -86,7 +86,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
u64 phys, u64 len, u32 flags)
{
struct fiemap_extent extent;
- struct fiemap_extent *dest = fieinfo->fi_extents_start;
+ struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
/* only count the extents */
if (fieinfo->fi_extents_max == 0) {
@@ -173,6 +173,7 @@ static int fiemap_check_ranges(struct super_block *sb,
static int ioctl_fiemap(struct file *filp, unsigned long arg)
{
struct fiemap fiemap;
+ struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
struct fiemap_extent_info fieinfo = { 0, };
struct inode *inode = filp->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
@@ -182,8 +183,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
if (!inode->i_op->fiemap)
return -EOPNOTSUPP;
- if (copy_from_user(&fiemap, (struct fiemap __user *)arg,
- sizeof(struct fiemap)))
+ if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
return -EFAULT;
if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
@@ -196,7 +196,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
fieinfo.fi_flags = fiemap.fm_flags;
fieinfo.fi_extents_max = fiemap.fm_extent_count;
- fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
+ fieinfo.fi_extents_start = ufiemap->fm_extents;
if (fiemap.fm_extent_count != 0 &&
!access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
@@ -209,7 +209,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len);
fiemap.fm_flags = fieinfo.fi_flags;
fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
- if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
+ if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
error = -EFAULT;
return error;
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 85c6be2db02f..3005ec4520ad 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -336,14 +336,13 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
size = sizeof(struct jffs2_eraseblock) * c->nr_blocks;
#ifndef __ECOS
if (jffs2_blocks_use_vmalloc(c))
- c->blocks = vmalloc(size);
+ c->blocks = vzalloc(size);
else
#endif
- c->blocks = kmalloc(size, GFP_KERNEL);
+ c->blocks = kzalloc(size, GFP_KERNEL);
if (!c->blocks)
return -ENOMEM;
- memset(c->blocks, 0, size);
for (i=0; i<c->nr_blocks; i++) {
INIT_LIST_HEAD(&c->blocks[i].list);
c->blocks[i].offset = i * c->sector_size;
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index f864005de64c..0bc6a6c80a56 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -144,4 +144,4 @@ struct jffs2_sb_info {
void *os_priv;
};
-#endif /* _JFFS2_FB_SB */
+#endif /* _JFFS2_FS_SB */
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 9b572ca40a49..4f9cc0482949 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -151,7 +151,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
offset, je32_to_cpu(rx.hdr_crc), crc);
xd->flags |= JFFS2_XFLAGS_INVALID;
- return EIO;
+ return -EIO;
}
totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len));
if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK
@@ -167,7 +167,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
je32_to_cpu(rx.xid), xd->xid,
je32_to_cpu(rx.version), xd->version);
xd->flags |= JFFS2_XFLAGS_INVALID;
- return EIO;
+ return -EIO;
}
xd->xprefix = rx.xprefix;
xd->name_len = rx.name_len;
@@ -230,7 +230,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
ref_offset(xd->node), xd->data_crc, crc);
kfree(data);
xd->flags |= JFFS2_XFLAGS_INVALID;
- return EIO;
+ return -EIO;
}
xd->flags |= JFFS2_XFLAGS_HOT;
@@ -268,7 +268,7 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
if (xd->xname)
return 0;
if (xd->flags & JFFS2_XFLAGS_INVALID)
- return EIO;
+ return -EIO;
if (unlikely(is_xattr_datum_unchecked(c, xd)))
rc = do_verify_xattr_datum(c, xd);
if (!rc)
@@ -460,7 +460,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
if (crc != je32_to_cpu(rr.node_crc)) {
JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
offset, je32_to_cpu(rr.node_crc), crc);
- return EIO;
+ return -EIO;
}
if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK
|| je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF
@@ -470,7 +470,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF,
je32_to_cpu(rr.totlen), PAD(sizeof(rr)));
- return EIO;
+ return -EIO;
}
ref->ino = je32_to_cpu(rr.ino);
ref->xid = je32_to_cpu(rr.xid);
diff --git a/fs/namei.c b/fs/namei.c
index 8f7b41a14882..7d77f24d32a9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -923,37 +923,13 @@ static int follow_automount(struct path *path, unsigned flags,
if (!mnt) /* mount collision */
return 0;
- /* The new mount record should have at least 2 refs to prevent it being
- * expired before we get a chance to add it
- */
- BUG_ON(mnt_get_count(mnt) < 2);
-
- if (mnt->mnt_sb == path->mnt->mnt_sb &&
- mnt->mnt_root == path->dentry) {
- mnt_clear_expiry(mnt);
- mntput(mnt);
- mntput(mnt);
- return -ELOOP;
- }
+ err = finish_automount(mnt, path);
- /* We need to add the mountpoint to the parent. The filesystem may
- * have placed it on an expiry list, and so we need to make sure it
- * won't be expired under us if do_add_mount() fails (do_add_mount()
- * will eat a reference unconditionally).
- */
- mntget(mnt);
- err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
switch (err) {
case -EBUSY:
/* Someone else made a mount here whilst we were busy */
- err = 0;
- default:
- mnt_clear_expiry(mnt);
- mntput(mnt);
- mntput(mnt);
- return err;
+ return 0;
case 0:
- mntput(mnt);
dput(path->dentry);
if (*need_mntput)
mntput(path->mnt);
@@ -961,7 +937,10 @@ static int follow_automount(struct path *path, unsigned flags,
path->dentry = dget(mnt->mnt_root);
*need_mntput = true;
return 0;
+ default:
+ return err;
}
+
}
/*
@@ -1293,8 +1272,10 @@ done:
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
- if (unlikely(err < 0))
+ if (unlikely(err < 0)) {
+ path_put_conditional(path, nd);
return err;
+ }
*inode = path->dentry->d_inode;
return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index 9f544f35ed34..7b0b95371696 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1872,6 +1872,8 @@ out:
return err;
}
+static int do_add_mount(struct vfsmount *, struct path *, int);
+
/*
* create a new mount for userspace and request it to be added into the
* namespace's tree
@@ -1880,6 +1882,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;
+ int err;
if (!type)
return -EINVAL;
@@ -1892,14 +1895,47 @@ static int do_new_mount(struct path *path, char *type, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- return do_add_mount(mnt, path, mnt_flags);
+ err = do_add_mount(mnt, path, mnt_flags);
+ if (err)
+ mntput(mnt);
+ return err;
+}
+
+int finish_automount(struct vfsmount *m, struct path *path)
+{
+ int err;
+ /* The new mount record should have at least 2 refs to prevent it being
+ * expired before we get a chance to add it
+ */
+ BUG_ON(mnt_get_count(m) < 2);
+
+ if (m->mnt_sb == path->mnt->mnt_sb &&
+ m->mnt_root == path->dentry) {
+ err = -ELOOP;
+ goto fail;
+ }
+
+ err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
+ if (!err)
+ return 0;
+fail:
+ /* remove m from any expiration list it may be on */
+ if (!list_empty(&m->mnt_expire)) {
+ down_write(&namespace_sem);
+ br_write_lock(vfsmount_lock);
+ list_del_init(&m->mnt_expire);
+ br_write_unlock(vfsmount_lock);
+ up_write(&namespace_sem);
+ }
+ mntput(m);
+ mntput(m);
+ return err;
}
/*
* add a mount into a namespace's mount tree
- * - this unconditionally eats one of the caller's references to newmnt.
*/
-int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
+static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
{
int err;
@@ -1926,15 +1962,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
goto unlock;
newmnt->mnt_flags = mnt_flags;
- if ((err = graft_tree(newmnt, path)))
- goto unlock;
-
- up_write(&namespace_sem);
- return 0;
+ err = graft_tree(newmnt, path);
unlock:
up_write(&namespace_sem);
- mntput(newmnt);
return err;
}
@@ -1956,20 +1987,6 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
EXPORT_SYMBOL(mnt_set_expiry);
/*
- * Remove a vfsmount from any expiration list it may be on
- */
-void mnt_clear_expiry(struct vfsmount *mnt)
-{
- if (!list_empty(&mnt->mnt_expire)) {
- down_write(&namespace_sem);
- br_write_lock(vfsmount_lock);
- list_del_init(&mnt->mnt_expire);
- br_write_unlock(vfsmount_lock);
- up_write(&namespace_sem);
- }
-}
-
-/*
* process a list of expirable mountpoints with the intent of discarding any
* mountpoints that aren't in use and haven't been touched since last we came
* here
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 63e3fca266e0..a6651956482e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1989,20 +1989,20 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
}
-static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
+static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_space_resv sr;
int change_size = 1;
int cmd = OCFS2_IOC_RESVSP64;
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
if (!ocfs2_writes_unwritten_extents(osb))
return -EOPNOTSUPP;
- if (S_ISDIR(inode->i_mode))
- return -ENODEV;
-
if (mode & FALLOC_FL_KEEP_SIZE)
change_size = 0;
@@ -2610,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = {
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
- .fallocate = ocfs2_fallocate,
.fiemap = ocfs2_fiemap,
};
@@ -2642,6 +2641,7 @@ const struct file_operations ocfs2_fops = {
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_write = ocfs2_file_splice_write,
+ .fallocate = ocfs2_fallocate,
};
const struct file_operations ocfs2_dops = {
diff --git a/fs/open.c b/fs/open.c
index 5b6ef7e2859e..e52389e1f05b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
return -EFBIG;
- if (!inode->i_op->fallocate)
+ if (!file->f_op->fallocate)
return -EOPNOTSUPP;
- return inode->i_op->fallocate(inode, mode, offset, len);
+ return file->f_op->fallocate(file, mode, offset, len);
}
SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
diff --git a/fs/pipe.c b/fs/pipe.c
index 89e9e19b1b2e..da42f7db50de 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -441,7 +441,7 @@ redo:
break;
}
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
pipe_wait(pipe);
@@ -450,7 +450,7 @@ redo:
/* Signal writers asynchronously that there is more room. */
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
if (ret > 0)
@@ -612,7 +612,7 @@ redo2:
break;
}
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
do_wakeup = 0;
}
@@ -623,7 +623,7 @@ redo2:
out:
mutex_unlock(&inode->i_mutex);
if (do_wakeup) {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLIN);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
if (ret > 0)
@@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw)
if (!pipe->readers && !pipe->writers) {
free_pipe_info(inode);
} else {
- wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT);
+ wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ef51eb43e137..a55c1b46b219 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -37,6 +37,7 @@
#include "xfs_trace.h"
#include <linux/dcache.h>
+#include <linux/falloc.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -882,6 +883,60 @@ out_unlock:
return ret;
}
+STATIC long
+xfs_file_fallocate(
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ long error;
+ loff_t new_size = 0;
+ xfs_flock64_t bf;
+ xfs_inode_t *ip = XFS_I(inode);
+ int cmd = XFS_IOC_RESVSP;
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
+ bf.l_whence = 0;
+ bf.l_start = offset;
+ bf.l_len = len;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ cmd = XFS_IOC_UNRESVSP;
+
+ /* check the new inode size is valid before allocating */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = inode_newsize_ok(inode, new_size);
+ if (error)
+ goto out_unlock;
+ }
+
+ error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
+ if (error)
+ goto out_unlock;
+
+ /* Change file size if needed */
+ if (new_size) {
+ struct iattr iattr;
+
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = new_size;
+ error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+ }
+
+out_unlock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
+
+
STATIC int
xfs_file_open(
struct inode *inode,
@@ -1000,6 +1055,7 @@ const struct file_operations xfs_file_operations = {
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
+ .fallocate = xfs_file_fallocate,
};
const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index da54403633b6..bd5727852fd6 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -46,7 +46,6 @@
#include <linux/namei.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
-#include <linux/falloc.h>
#include <linux/fiemap.h>
#include <linux/slab.h>
@@ -505,61 +504,6 @@ xfs_vn_setattr(
return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
}
-STATIC long
-xfs_vn_fallocate(
- struct inode *inode,
- int mode,
- loff_t offset,
- loff_t len)
-{
- long error;
- loff_t new_size = 0;
- xfs_flock64_t bf;
- xfs_inode_t *ip = XFS_I(inode);
- int cmd = XFS_IOC_RESVSP;
-
- /* preallocation on directories not yet supported */
- error = -ENODEV;
- if (S_ISDIR(inode->i_mode))
- goto out_error;
-
- bf.l_whence = 0;
- bf.l_start = offset;
- bf.l_len = len;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- if (mode & FALLOC_FL_PUNCH_HOLE)
- cmd = XFS_IOC_UNRESVSP;
-
- /* check the new inode size is valid before allocating */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
- new_size = offset + len;
- error = inode_newsize_ok(inode, new_size);
- if (error)
- goto out_unlock;
- }
-
- error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
- if (error)
- goto out_unlock;
-
- /* Change file size if needed */
- if (new_size) {
- struct iattr iattr;
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = new_size;
- error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
- }
-
-out_unlock:
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-out_error:
- return error;
-}
-
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
/*
@@ -653,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .fallocate = xfs_vn_fallocate,
.fiemap = xfs_vn_fiemap,
};
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index e6cf955ec0fc..0df88897ef84 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -75,11 +75,11 @@ xfs_cmn_err(
{
struct va_format vaf;
va_list args;
- int panic = 0;
+ int do_panic = 0;
if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
- panic = 1;
+ do_panic = 1;
}
va_start(args, fmt);
@@ -89,7 +89,7 @@ xfs_cmn_err(
printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
va_end(args);
- BUG_ON(panic);
+ BUG_ON(do_panic);
}
void