diff options
Diffstat (limited to 'fs')
271 files changed, 3790 insertions, 5513 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 14da82564f4e..6894b085f0ee 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -537,7 +537,7 @@ static struct attribute_group v9fs_attr_group = { * */ -static int v9fs_sysfs_init(void) +static int __init v9fs_sysfs_init(void) { v9fs_kobj = kobject_create_and_add("9p", fs_kobj); if (!v9fs_kobj) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 4d0c2e0be7e5..0b3bfa303dda 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -42,7 +42,6 @@ /** * struct p9_rdir - readdir accounting - * @mutex: mutex protecting readdir * @head: start offset of current dirread buffer * @tail: end offset of current dirread buffer * @buf: dirread buffer diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index d8223209d4b1..96e550760699 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -352,9 +352,6 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd, invalidate_mapping_pages(&inode->i_data, 0, -1); } /* Convert flock to posix lock */ - fl->fl_owner = (fl_owner_t)filp; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; fl->fl_flags |= FL_POSIX; fl->fl_flags ^= FL_FLOCK; @@ -684,7 +681,7 @@ v9fs_direct_read(struct file *filp, char __user *udata, size_t count, /** * v9fs_cached_file_read - read from a file * @filp: file pointer to read - * @udata: user data buffer to read data into + * @data: user data buffer to read data into * @count: size of buffer * @offset: offset at which to read data * @@ -701,7 +698,7 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count, /** * v9fs_mmap_file_read - read from a file * @filp: file pointer to read - * @udata: user data buffer to read data into + * @data: user data buffer to read data into * @count: size of buffer * @offset: offset at which to read data * diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 53161ec058a7..7fa4f7a7653d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -147,7 +147,7 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, int major = -1, minor = -1; strlcpy(ext, stat->extension, sizeof(ext)); - sscanf(ext, "%c %u %u", &type, &major, &minor); + sscanf(ext, "%c %i %i", &type, &major, &minor); switch (type) { case 'c': res |= S_IFCHR; @@ -580,7 +580,7 @@ static int v9fs_at_to_dotl_flags(int flags) * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted * @dentry: dentry that is being deleted - * @rmdir: removing a directory + * @flags: removing a directory * */ @@ -778,7 +778,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode * @dir: inode that is being walked from * @dentry: dentry that is being walked to? - * @nameidata: path data + * @flags: lookup flags (unused) * */ @@ -1324,7 +1324,7 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) * v9fs_vfs_mkspecial - create a special file * @dir: inode to create special file in * @dentry: dentry to create - * @mode: mode to create special file + * @perm: mode to create special file * @extension: 9p2000.u format extension string representing special file * */ diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 59dc8e87647f..1fa85aae24df 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -226,7 +226,7 @@ int v9fs_open_to_dotl_flags(int flags) * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. * @dir: directory inode that is being created * @dentry: dentry that is being deleted - * @mode: create permissions + * @omode: create permissions * */ @@ -375,7 +375,7 @@ err_clunk_old_fid: * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked - * @mode: mode for new directory + * @omode: mode for new directory * */ @@ -607,7 +607,6 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) * v9fs_stat2inode_dotl - populate an inode structure with stat info * @stat: stat structure * @inode: inode to populate - * @sb: superblock of filesystem * */ @@ -808,7 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, * v9fs_vfs_mknod_dotl - create a special file * @dir: inode destination for new link * @dentry: dentry for file - * @mode: mode for creation + * @omode: mode for creation * @rdev: device associated with special file * */ diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 04133a1fd9cb..f95e01e058e4 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -156,7 +156,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, offset += write_count; value_len -= write_count; } - retval = offset; + retval = 0; err: p9_client_clunk(fid); return retval; diff --git a/fs/Makefile b/fs/Makefile index f9cb9876e466..4030cbfbc9af 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -14,14 +14,13 @@ obj-y := open.o read_write.o file_table.o super.o \ stack.o fs_struct.o statfs.o ifeq ($(CONFIG_BLOCK),y) -obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o +obj-y += buffer.o block_dev.o direct-io.o mpage.o else obj-y += no-block.o endif obj-$(CONFIG_PROC_FS) += proc_namespace.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_ANON_INODES) += anon_inodes.o diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 25b23b1e7f22..9bca88159725 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -1,3 +1,9 @@ +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/fs.h> #include <linux/buffer_head.h> @@ -206,7 +212,7 @@ affs_set_blocksize(struct super_block *sb, int size) static inline struct buffer_head * affs_bread(struct super_block *sb, int block) { - pr_debug("affs_bread: %d\n", block); + pr_debug("%s: %d\n", __func__, block); if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) return sb_bread(sb, block); return NULL; @@ -214,7 +220,7 @@ affs_bread(struct super_block *sb, int block) static inline struct buffer_head * affs_getblk(struct super_block *sb, int block) { - pr_debug("affs_getblk: %d\n", block); + pr_debug("%s: %d\n", __func__, block); if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) return sb_getblk(sb, block); return NULL; @@ -223,7 +229,7 @@ static inline struct buffer_head * affs_getzeroblk(struct super_block *sb, int block) { struct buffer_head *bh; - pr_debug("affs_getzeroblk: %d\n", block); + pr_debug("%s: %d\n", __func__, block); if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) { bh = sb_getblk(sb, block); lock_buffer(bh); @@ -238,7 +244,7 @@ static inline struct buffer_head * affs_getemptyblk(struct super_block *sb, int block) { struct buffer_head *bh; - pr_debug("affs_getemptyblk: %d\n", block); + pr_debug("%s: %d\n", __func__, block); if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) { bh = sb_getblk(sb, block); wait_on_buffer(bh); @@ -251,7 +257,7 @@ static inline void affs_brelse(struct buffer_head *bh) { if (bh) - pr_debug("affs_brelse: %lld\n", (long long) bh->b_blocknr); + pr_debug("%s: %lld\n", __func__, (long long) bh->b_blocknr); brelse(bh); } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 533a322c41c0..406b29836b19 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -34,7 +34,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) ino = bh->b_blocknr; offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]); - pr_debug("AFFS: insert_hash(dir=%u, ino=%d)\n", (u32)dir->i_ino, ino); + pr_debug("%s(dir=%u, ino=%d)\n", __func__, (u32)dir->i_ino, ino); dir_bh = affs_bread(sb, dir->i_ino); if (!dir_bh) @@ -84,7 +84,8 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) sb = dir->i_sb; rem_ino = rem_bh->b_blocknr; offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]); - pr_debug("AFFS: remove_hash(dir=%d, ino=%d, hashval=%d)\n", (u32)dir->i_ino, rem_ino, offset); + pr_debug("%s(dir=%d, ino=%d, hashval=%d)\n", + __func__, (u32)dir->i_ino, rem_ino, offset); bh = affs_bread(sb, dir->i_ino); if (!bh) @@ -147,7 +148,7 @@ affs_remove_link(struct dentry *dentry) u32 link_ino, ino; int retval; - pr_debug("AFFS: remove_link(key=%ld)\n", inode->i_ino); + pr_debug("%s(key=%ld)\n", __func__, inode->i_ino); retval = -EIO; bh = affs_bread(sb, inode->i_ino); if (!bh) @@ -279,7 +280,7 @@ affs_remove_header(struct dentry *dentry) if (!inode) goto done; - pr_debug("AFFS: remove_header(key=%ld)\n", inode->i_ino); + pr_debug("%s(key=%ld)\n", __func__, inode->i_ino); retval = -EIO; bh = affs_bread(sb, (u32)(long)dentry->d_fsdata); if (!bh) @@ -451,10 +452,10 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...) vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args); va_end(args); - printk(KERN_CRIT "AFFS error (device %s): %s(): %s\n", sb->s_id, + pr_crit("error (device %s): %s(): %s\n", sb->s_id, function,ErrorBuffer); if (!(sb->s_flags & MS_RDONLY)) - printk(KERN_WARNING "AFFS: Remounting filesystem read-only\n"); + pr_warn("Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; } @@ -467,7 +468,7 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...) vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args); va_end(args); - printk(KERN_WARNING "AFFS warning (device %s): %s(): %s\n", sb->s_id, + pr_warn("(device %s): %s(): %s\n", sb->s_id, function,ErrorBuffer); } diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index a32246b8359e..c8de51185c23 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -17,7 +17,7 @@ affs_count_free_blocks(struct super_block *sb) u32 free; int i; - pr_debug("AFFS: count_free_blocks()\n"); + pr_debug("%s()\n", __func__); if (sb->s_flags & MS_RDONLY) return 0; @@ -43,7 +43,7 @@ affs_free_block(struct super_block *sb, u32 block) u32 blk, bmap, bit, mask, tmp; __be32 *data; - pr_debug("AFFS: free_block(%u)\n", block); + pr_debug("%s(%u)\n", __func__, block); if (block > sbi->s_partition_size) goto err_range; @@ -125,7 +125,7 @@ affs_alloc_block(struct inode *inode, u32 goal) sb = inode->i_sb; sbi = AFFS_SB(sb); - pr_debug("AFFS: balloc(inode=%lu,goal=%u): ", inode->i_ino, goal); + pr_debug("balloc(inode=%lu,goal=%u): ", inode->i_ino, goal); if (AFFS_I(inode)->i_pa_cnt) { pr_debug("%d\n", AFFS_I(inode)->i_lastalloc+1); @@ -254,8 +254,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) return 0; if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) { - printk(KERN_NOTICE "AFFS: Bitmap invalid - mounting %s read only\n", - sb->s_id); + pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id); *flags |= MS_RDONLY; return 0; } @@ -268,7 +267,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) size = sbi->s_bmap_count * sizeof(*bm); bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL); if (!sbi->s_bitmap) { - printk(KERN_ERR "AFFS: Bitmap allocation failed\n"); + pr_err("Bitmap allocation failed\n"); return -ENOMEM; } @@ -282,17 +281,17 @@ int affs_init_bitmap(struct super_block *sb, int *flags) bm->bm_key = be32_to_cpu(bmap_blk[blk]); bh = affs_bread(sb, bm->bm_key); if (!bh) { - printk(KERN_ERR "AFFS: Cannot read bitmap\n"); + pr_err("Cannot read bitmap\n"); res = -EIO; goto out; } if (affs_checksum_block(sb, bh)) { - printk(KERN_WARNING "AFFS: Bitmap %u invalid - mounting %s read only.\n", - bm->bm_key, sb->s_id); + pr_warn("Bitmap %u invalid - mounting %s read only.\n", + bm->bm_key, sb->s_id); *flags |= MS_RDONLY; goto out; } - pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key); + pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key); bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4); /* Don't try read the extension if this is the last block, @@ -304,7 +303,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags) affs_brelse(bmap_bh); bmap_bh = affs_bread(sb, be32_to_cpu(bmap_blk[blk])); if (!bmap_bh) { - printk(KERN_ERR "AFFS: Cannot read bitmap extension\n"); + pr_err("Cannot read bitmap extension\n"); res = -EIO; goto out; } diff --git a/fs/affs/dir.c b/fs/affs/dir.c index cbbda476a805..59f07bec92a6 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -54,8 +54,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) u32 ino; int error = 0; - pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n", - inode->i_ino, (unsigned long)ctx->pos); + pr_debug("%s(ino=%lu,f_pos=%lx)\n", + __func__, inode->i_ino, (unsigned long)ctx->pos); if (ctx->pos < 2) { file->private_data = (void *)0; @@ -81,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) */ ino = (u32)(long)file->private_data; if (ino && file->f_version == inode->i_version) { - pr_debug("AFFS: readdir() left off=%d\n", ino); + pr_debug("readdir() left off=%d\n", ino); goto inside; } @@ -117,7 +117,7 @@ inside: namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); name = AFFS_TAIL(sb, fh_bh)->name + 1; - pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", " + pr_debug("readdir(): dir_emit(\"%.*s\", " "ino=%u), hash=%d, f_pos=%x\n", namelen, name, ino, hash_pos, (u32)ctx->pos); diff --git a/fs/affs/file.c b/fs/affs/file.c index 8669b6ecddee..0270303388ee 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -45,7 +45,7 @@ const struct inode_operations affs_file_inode_operations = { static int affs_file_open(struct inode *inode, struct file *filp) { - pr_debug("AFFS: open(%lu,%d)\n", + pr_debug("open(%lu,%d)\n", inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); atomic_inc(&AFFS_I(inode)->i_opencnt); return 0; @@ -54,7 +54,7 @@ affs_file_open(struct inode *inode, struct file *filp) static int affs_file_release(struct inode *inode, struct file *filp) { - pr_debug("AFFS: release(%lu, %d)\n", + pr_debug("release(%lu, %d)\n", inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) { @@ -324,7 +324,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul struct buffer_head *ext_bh; u32 ext; - pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); + pr_debug("%s(%u, %lu)\n", + __func__, (u32)inode->i_ino, (unsigned long)block); BUG_ON(block > (sector_t)0x7fffffffUL); @@ -498,34 +499,36 @@ affs_getemptyblk_ino(struct inode *inode, int block) } static int -affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsigned to) +affs_do_readpage_ofs(struct page *page, unsigned to) { struct inode *inode = page->mapping->host; struct super_block *sb = inode->i_sb; struct buffer_head *bh; char *data; + unsigned pos = 0; u32 bidx, boff, bsize; u32 tmp; - pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); - BUG_ON(from > to || to > PAGE_CACHE_SIZE); + pr_debug("%s(%u, %ld, 0, %d)\n", __func__, (u32)inode->i_ino, + page->index, to); + BUG_ON(to > PAGE_CACHE_SIZE); kmap(page); data = page_address(page); bsize = AFFS_SB(sb)->s_data_blksize; - tmp = (page->index << PAGE_CACHE_SHIFT) + from; + tmp = page->index << PAGE_CACHE_SHIFT; bidx = tmp / bsize; boff = tmp % bsize; - while (from < to) { + while (pos < to) { bh = affs_bread_ino(inode, bidx, 0); if (IS_ERR(bh)) return PTR_ERR(bh); - tmp = min(bsize - boff, to - from); - BUG_ON(from + tmp > to || tmp > bsize); - memcpy(data + from, AFFS_DATA(bh) + boff, tmp); + tmp = min(bsize - boff, to - pos); + BUG_ON(pos + tmp > to || tmp > bsize); + memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); affs_brelse(bh); bidx++; - from += tmp; + pos += tmp; boff = 0; } flush_dcache_page(page); @@ -542,7 +545,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) u32 size, bsize; u32 tmp; - pr_debug("AFFS: extent_file(%u, %d)\n", (u32)inode->i_ino, newsize); + pr_debug("%s(%u, %d)\n", __func__, (u32)inode->i_ino, newsize); bsize = AFFS_SB(sb)->s_data_blksize; bh = NULL; size = AFFS_I(inode)->mmu_private; @@ -608,14 +611,14 @@ affs_readpage_ofs(struct file *file, struct page *page) u32 to; int err; - pr_debug("AFFS: read_page(%u, %ld)\n", (u32)inode->i_ino, page->index); + pr_debug("%s(%u, %ld)\n", __func__, (u32)inode->i_ino, page->index); to = PAGE_CACHE_SIZE; if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) { to = inode->i_size & ~PAGE_CACHE_MASK; memset(page_address(page) + to, 0, PAGE_CACHE_SIZE - to); } - err = affs_do_readpage_ofs(file, page, 0, to); + err = affs_do_readpage_ofs(page, to); if (!err) SetPageUptodate(page); unlock_page(page); @@ -631,7 +634,8 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping pgoff_t index; int err = 0; - pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len); + pr_debug("%s(%u, %llu, %llu)\n", __func__, (u32)inode->i_ino, + (unsigned long long)pos, (unsigned long long)pos + len); if (pos > AFFS_I(inode)->mmu_private) { /* XXX: this probably leaves a too-big i_size in case of * failure. Should really be updating i_size at write_end time @@ -651,7 +655,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping return 0; /* XXX: inefficient but safe in the face of short writes */ - err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE); + err = affs_do_readpage_ofs(page, PAGE_CACHE_SIZE); if (err) { unlock_page(page); page_cache_release(page); @@ -680,7 +684,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, * due to write_begin. */ - pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len); + pr_debug("%s(%u, %llu, %llu)\n", + __func__, (u32)inode->i_ino, (unsigned long long)pos, + (unsigned long long)pos + len); bsize = AFFS_SB(sb)->s_data_blksize; data = page_address(page); @@ -802,7 +808,7 @@ affs_free_prealloc(struct inode *inode) { struct super_block *sb = inode->i_sb; - pr_debug("AFFS: free_prealloc(ino=%lu)\n", inode->i_ino); + pr_debug("free_prealloc(ino=%lu)\n", inode->i_ino); while (AFFS_I(inode)->i_pa_cnt) { AFFS_I(inode)->i_pa_cnt--; @@ -822,7 +828,7 @@ affs_truncate(struct inode *inode) struct buffer_head *ext_bh; int i; - pr_debug("AFFS: truncate(inode=%d, oldsize=%u, newsize=%u)\n", + pr_debug("truncate(inode=%d, oldsize=%u, newsize=%u)\n", (u32)inode->i_ino, (u32)AFFS_I(inode)->mmu_private, (u32)inode->i_size); last_blk = 0; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 96df91e8c334..bec2d1a0c91c 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -34,7 +34,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) if (!(inode->i_state & I_NEW)) return inode; - pr_debug("AFFS: affs_iget(%lu)\n", inode->i_ino); + pr_debug("affs_iget(%lu)\n", inode->i_ino); block = inode->i_ino; bh = affs_bread(sb, block); @@ -175,7 +175,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) uid_t uid; gid_t gid; - pr_debug("AFFS: write_inode(%lu)\n",inode->i_ino); + pr_debug("write_inode(%lu)\n", inode->i_ino); if (!inode->i_nlink) // possibly free block @@ -220,7 +220,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; int error; - pr_debug("AFFS: notify_change(%lu,0x%x)\n",inode->i_ino,attr->ia_valid); + pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid); error = inode_change_ok(inode,attr); if (error) @@ -258,7 +258,8 @@ void affs_evict_inode(struct inode *inode) { unsigned long cache_page; - pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); + pr_debug("evict_inode(ino=%lu, nlink=%u)\n", + inode->i_ino, inode->i_nlink); truncate_inode_pages_final(&inode->i_data); if (!inode->i_nlink) { @@ -271,7 +272,7 @@ affs_evict_inode(struct inode *inode) affs_free_prealloc(inode); cache_page = (unsigned long)AFFS_I(inode)->i_lc; if (cache_page) { - pr_debug("AFFS: freeing ext cache\n"); + pr_debug("freeing ext cache\n"); AFFS_I(inode)->i_lc = NULL; AFFS_I(inode)->i_ac = NULL; free_page(cache_page); @@ -350,7 +351,8 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 u32 block = 0; int retval; - pr_debug("AFFS: add_entry(dir=%u, inode=%u, \"%*s\", type=%d)\n", (u32)dir->i_ino, + pr_debug("%s(dir=%u, inode=%u, \"%*s\", type=%d)\n", + __func__, (u32)dir->i_ino, (u32)inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name, type); retval = -EIO; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 6dae1ccd176d..035bd31556fc 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -190,7 +190,8 @@ affs_find_entry(struct inode *dir, struct dentry *dentry) toupper_t toupper = affs_get_toupper(sb); u32 key; - pr_debug("AFFS: find_entry(\"%.*s\")\n", (int)dentry->d_name.len, dentry->d_name.name); + pr_debug("%s(\"%.*s\")\n", + __func__, (int)dentry->d_name.len, dentry->d_name.name); bh = affs_bread(sb, dir->i_ino); if (!bh) @@ -218,7 +219,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) struct buffer_head *bh; struct inode *inode = NULL; - pr_debug("AFFS: lookup(\"%.*s\")\n",(int)dentry->d_name.len,dentry->d_name.name); + pr_debug("%s(\"%.*s\")\n", + __func__, (int)dentry->d_name.len, dentry->d_name.name); affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); @@ -248,9 +250,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) int affs_unlink(struct inode *dir, struct dentry *dentry) { - pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino, - dentry->d_inode->i_ino, - (int)dentry->d_name.len, dentry->d_name.name); + pr_debug("%s(dir=%d, %lu \"%.*s\")\n", + __func__, (u32)dir->i_ino, dentry->d_inode->i_ino, + (int)dentry->d_name.len, dentry->d_name.name); return affs_remove_header(dentry); } @@ -262,7 +264,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) struct inode *inode; int error; - pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len, + pr_debug("%s(%lu,\"%.*s\",0%ho)\n", + __func__, dir->i_ino, (int)dentry->d_name.len, dentry->d_name.name,mode); inode = affs_new_inode(dir); @@ -291,8 +294,9 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int error; - pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino, - (int)dentry->d_name.len,dentry->d_name.name,mode); + pr_debug("%s(%lu,\"%.*s\",0%ho)\n", + __func__, dir->i_ino, (int)dentry->d_name.len, + dentry->d_name.name, mode); inode = affs_new_inode(dir); if (!inode) @@ -317,8 +321,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int affs_rmdir(struct inode *dir, struct dentry *dentry) { - pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino, - dentry->d_inode->i_ino, + pr_debug("%s(dir=%u, %lu \"%.*s\")\n", + __func__, (u32)dir->i_ino, dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); return affs_remove_header(dentry); @@ -334,8 +338,9 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) int i, maxlen, error; char c, lc; - pr_debug("AFFS: symlink(%lu,\"%.*s\" -> \"%s\")\n",dir->i_ino, - (int)dentry->d_name.len,dentry->d_name.name,symname); + pr_debug("%s(%lu,\"%.*s\" -> \"%s\")\n", + __func__, dir->i_ino, (int)dentry->d_name.len, + dentry->d_name.name, symname); maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1; inode = affs_new_inode(dir); @@ -404,7 +409,8 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; - pr_debug("AFFS: link(%u, %u, \"%.*s\")\n", (u32)inode->i_ino, (u32)dir->i_ino, + pr_debug("%s(%u, %u, \"%.*s\")\n", + __func__, (u32)inode->i_ino, (u32)dir->i_ino, (int)dentry->d_name.len,dentry->d_name.name); return affs_add_entry(dir, inode, dentry, ST_LINKFILE); @@ -418,9 +424,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; - pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", - (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, - (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); + pr_debug("%s(old=%u,\"%*s\" to new=%u,\"%*s\")\n", + __func__, (u32)old_dir->i_ino, (int)old_dentry->d_name.len, + old_dentry->d_name.name, (u32)new_dir->i_ino, + (int)new_dentry->d_name.len, new_dentry->d_name.name); retval = affs_check_name(new_dentry->d_name.name, new_dentry->d_name.len, diff --git a/fs/affs/super.c b/fs/affs/super.c index 895ac7dc9dbf..51f1a95bff73 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -46,7 +46,7 @@ static void affs_put_super(struct super_block *sb) { struct affs_sb_info *sbi = AFFS_SB(sb); - pr_debug("AFFS: put_super()\n"); + pr_debug("%s()\n", __func__); cancel_delayed_work_sync(&sbi->sb_work); } @@ -220,7 +220,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, return 0; if (n != 512 && n != 1024 && n != 2048 && n != 4096) { - printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); + pr_warn("Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); return 0; } *blocksize = n; @@ -285,8 +285,8 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, /* Silently ignore the quota options */ break; default: - printk("AFFS: Unrecognized mount option \"%s\" " - "or missing value\n", p); + pr_warn("Unrecognized mount option \"%s\" or missing value\n", + p); return 0; } } @@ -319,7 +319,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) save_mount_options(sb, data); - pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options"); + pr_debug("read_super(%s)\n", data ? (const char *)data : "no options"); sb->s_magic = AFFS_SUPER_MAGIC; sb->s_op = &affs_sops; @@ -339,7 +339,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, &blocksize,&sbi->s_prefix, sbi->s_volume, &mount_flags)) { - printk(KERN_ERR "AFFS: Error parsing options\n"); + pr_err("Error parsing options\n"); return -EINVAL; } /* N.B. after this point s_prefix must be released */ @@ -356,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) */ size = sb->s_bdev->bd_inode->i_size >> 9; - pr_debug("AFFS: initial blocksize=%d, #blocks=%d\n", 512, size); + pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size); affs_set_blocksize(sb, PAGE_SIZE); /* Try to find root block. Its location depends on the block size. */ @@ -371,7 +371,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_root_block = root_block; if (root_block < 0) sbi->s_root_block = (reserved + size - 1) / 2; - pr_debug("AFFS: setting blocksize to %d\n", blocksize); + pr_debug("setting blocksize to %d\n", blocksize); affs_set_blocksize(sb, blocksize); sbi->s_partition_size = size; @@ -386,7 +386,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) * block behind the calculated one. So we check this one, too. */ for (num_bm = 0; num_bm < 2; num_bm++) { - pr_debug("AFFS: Dev %s, trying root=%u, bs=%d, " + pr_debug("Dev %s, trying root=%u, bs=%d, " "size=%d, reserved=%d\n", sb->s_id, sbi->s_root_block + num_bm, @@ -407,8 +407,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) } } if (!silent) - printk(KERN_ERR "AFFS: No valid root block on device %s\n", - sb->s_id); + pr_err("No valid root block on device %s\n", sb->s_id); return -EINVAL; /* N.B. after this point bh must be released */ @@ -420,7 +419,7 @@ got_root: /* Find out which kind of FS we have */ boot_bh = sb_bread(sb, 0); if (!boot_bh) { - printk(KERN_ERR "AFFS: Cannot read boot block\n"); + pr_err("Cannot read boot block\n"); return -EINVAL; } memcpy(sig, boot_bh->b_data, 4); @@ -433,8 +432,7 @@ got_root: */ if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS || chksum == MUFS_DCOFS) && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_NOTICE "AFFS: Dircache FS - mounting %s read only\n", - sb->s_id); + pr_notice("Dircache FS - mounting %s read only\n", sb->s_id); sb->s_flags |= MS_RDONLY; } switch (chksum) { @@ -468,14 +466,14 @@ got_root: sb->s_flags |= MS_NOEXEC; break; default: - printk(KERN_ERR "AFFS: Unknown filesystem on device %s: %08X\n", - sb->s_id, chksum); + pr_err("Unknown filesystem on device %s: %08X\n", + sb->s_id, chksum); return -EINVAL; } if (mount_flags & SF_VERBOSE) { u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0]; - printk(KERN_NOTICE "AFFS: Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n", + pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n", len > 31 ? 31 : len, AFFS_ROOT_TAIL(sb, root_bh)->disk_name + 1, sig, sig[3] + '0', blocksize); @@ -506,11 +504,11 @@ got_root: sb->s_root = d_make_root(root_inode); if (!sb->s_root) { - printk(KERN_ERR "AFFS: Get root inode failed\n"); + pr_err("AFFS: Get root inode failed\n"); return -ENOMEM; } - pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); + pr_debug("s_flags=%lX\n", sb->s_flags); return 0; } @@ -530,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) char volume[32]; char *prefix = NULL; - pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); + pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); sync_filesystem(sb); *flags |= MS_NODIRATIME; @@ -578,8 +576,9 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf) int free; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size, - AFFS_SB(sb)->s_reserved); + pr_debug("%s() partsize=%d, reserved=%d\n", + __func__, AFFS_SB(sb)->s_partition_size, + AFFS_SB(sb)->s_reserved); free = affs_count_free_blocks(sb); buf->f_type = AFFS_SUPER_MAGIC; diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index ee00f08c4f53..f39b71c3981e 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -21,7 +21,7 @@ static int affs_symlink_readpage(struct file *file, struct page *page) char c; char lc; - pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); + pr_debug("follow_link(ino=%lu)\n", inode->i_ino); err = -EIO; bh = affs_bread(inode->i_sb, inode->i_ino); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 1c8c6cc6de30..4b0eff6da674 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -130,6 +130,15 @@ static void afs_cm_destructor(struct afs_call *call) { _enter(""); + /* Break the callbacks here so that we do it after the final ACK is + * received. The step number here must match the final number in + * afs_deliver_cb_callback(). + */ + if (call->unmarshall == 6) { + ASSERT(call->server && call->count && call->request); + afs_break_callbacks(call->server, call->count, call->request); + } + afs_put_server(call->server); call->server = NULL; kfree(call->buffer); @@ -272,6 +281,16 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, _debug("trailer"); if (skb->len != 0) return -EBADMSG; + + /* Record that the message was unmarshalled successfully so + * that the call destructor can know do the callback breaking + * work, even if the final ACK isn't received. + * + * If the step number changes, then afs_cm_destructor() must be + * updated also. + */ + call->unmarshall++; + case 6: break; } diff --git a/fs/afs/flock.c b/fs/afs/flock.c index a8cf2cff836c..4baf1d2b39e4 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -555,10 +555,6 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl) return -ENOLCK; /* we're simulating flock() locks using posix locks on the server */ - fl->fl_owner = (fl_owner_t) file; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - if (fl->fl_type == F_UNLCK) return afs_do_unlk(file, fl); return afs_do_setlk(file, fl); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index be75b500005d..590b55f46d61 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -75,7 +75,7 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ - work_func_t async_workfn; + void (*async_workfn)(struct afs_call *call); /* asynchronous work function */ struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ struct sk_buff_head rx_queue; /* received packets */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ef943df73b8c..03a3beb17004 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -25,7 +25,7 @@ static void afs_wake_up_call_waiter(struct afs_call *); static int afs_wait_for_call_to_complete(struct afs_call *); static void afs_wake_up_async_call(struct afs_call *); static int afs_dont_wait_for_call_to_complete(struct afs_call *); -static void afs_process_async_call(struct work_struct *); +static void afs_process_async_call(struct afs_call *); static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *); static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool); @@ -58,6 +58,13 @@ static void afs_collect_incoming_call(struct work_struct *); static struct sk_buff_head afs_incoming_calls; static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); +static void afs_async_workfn(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, async_work); + + call->async_workfn(call); +} + /* * open an RxRPC socket and bind it to be a server for callback notifications * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT @@ -184,6 +191,28 @@ static void afs_free_call(struct afs_call *call) } /* + * End a call but do not free it + */ +static void afs_end_call_nofree(struct afs_call *call) +{ + if (call->rxcall) { + rxrpc_kernel_end_call(call->rxcall); + call->rxcall = NULL; + } + if (call->type->destructor) + call->type->destructor(call); +} + +/* + * End a call and free it + */ +static void afs_end_call(struct afs_call *call) +{ + afs_end_call_nofree(call); + afs_free_call(call); +} + +/* * allocate a call with flat request and reply buffers */ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, @@ -326,7 +355,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; - INIT_WORK(&call->async_work, afs_process_async_call); + call->async_workfn = afs_process_async_call; + INIT_WORK(&call->async_work, afs_async_workfn); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; @@ -383,11 +413,8 @@ error_do_abort: rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); - rxrpc_kernel_end_call(rxcall); - call->rxcall = NULL; error_kill_call: - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" = %d", ret); return ret; } @@ -509,12 +536,8 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state >= AFS_CALL_COMPLETE) { while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); - if (call->incoming) { - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); - } + if (call->incoming) + afs_end_call(call); } _leave(""); @@ -564,10 +587,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) } _debug("call complete"); - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" = %d", ret); return ret; } @@ -603,11 +623,8 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call) /* * delete an asynchronous call */ -static void afs_delete_async_call(struct work_struct *work) +static void afs_delete_async_call(struct afs_call *call) { - struct afs_call *call = - container_of(work, struct afs_call, async_work); - _enter(""); afs_free_call(call); @@ -620,11 +637,8 @@ static void afs_delete_async_call(struct work_struct *work) * - on a multiple-thread workqueue this work item may try to run on several * CPUs at the same time */ -static void afs_process_async_call(struct work_struct *work) +static void afs_process_async_call(struct afs_call *call) { - struct afs_call *call = - container_of(work, struct afs_call, async_work); - _enter(""); if (!skb_queue_empty(&call->rx_queue)) @@ -637,10 +651,7 @@ static void afs_process_async_call(struct work_struct *work) call->reply = NULL; /* kill the call */ - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - if (call->type->destructor) - call->type->destructor(call); + afs_end_call_nofree(call); /* we can't just delete the call because the work item may be * queued */ @@ -663,13 +674,6 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) call->reply_size += len; } -static void afs_async_workfn(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - call->async_workfn(work); -} - /* * accept the backlog of incoming calls */ @@ -790,10 +794,7 @@ void afs_send_empty_reply(struct afs_call *call) _debug("oom"); rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); default: - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" [error]"); return; } @@ -823,17 +824,16 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) call->state = AFS_CALL_AWAIT_ACK; n = rxrpc_kernel_send_data(call->rxcall, &msg, len); if (n >= 0) { + /* Success */ _leave(" [replied]"); return; } + if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); } - rxrpc_kernel_end_call(call->rxcall); - call->rxcall = NULL; - call->type->destructor(call); - afs_free_call(call); + afs_end_call(call); _leave(" [error]"); } diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 232e03d4780d..5b570b6efa28 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -737,7 +737,7 @@ MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); MODULE_ALIAS("devname:autofs"); /* Register/deregister misc character device */ -int autofs_dev_ioctl_init(void) +int __init autofs_dev_ioctl_init(void) { int r; diff --git a/fs/befs/btree.c b/fs/befs/btree.c index a2cd305a993a..9c7faa8a9288 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -318,7 +318,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, * befs_find_key - Search for a key within a node * @sb: Filesystem superblock * @node: Node to find the key within - * @key: Keystring to search for + * @findkey: Keystring to search for * @value: If key is found, the value stored with the key is put here * * finds exact match if one exists, and returns BEFS_BT_MATCH @@ -405,7 +405,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, * Heres how it works: Key_no is the index of the key/value pair to * return in keybuf/value. * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is - * the number of charecters in the key (just a convenience). + * the number of characters in the key (just a convenience). * * Algorithm: * Get the first leafnode of the tree. See if the requested key is in that @@ -502,12 +502,11 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, "for key of size %d", __func__, bufsize, keylen); brelse(this_node->bh); goto error_alloc; - }; + } - strncpy(keybuf, keystart, keylen); + strlcpy(keybuf, keystart, keylen + 1); *value = fs64_to_cpu(sb, valarray[cur_key]); *keysize = keylen; - keybuf[keylen] = '\0'; befs_debug(sb, "Read [%llu,%d]: Key \"%.*s\", Value %llu", node_off, cur_key, keylen, keybuf, *value); @@ -707,7 +706,7 @@ befs_bt_get_key(struct super_block *sb, befs_btree_node * node, * @key1: pointer to the first key to be compared * @keylen1: length in bytes of key1 * @key2: pointer to the second key to be compared - * @kelen2: length in bytes of key2 + * @keylen2: length in bytes of key2 * * Returns 0 if @key1 and @key2 are equal. * Returns >0 if @key1 is greater. diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index c467bebd50af..1e8e0b8d8836 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -116,7 +116,7 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data, * befs_read_lsmylink - read long symlink from datastream. * @sb: Filesystem superblock * @ds: Datastrem to read from - * @buf: Buffer in which to place long symlink data + * @buff: Buffer in which to place long symlink data * @len: Length of the long symlink in bytes * * Returns the number of bytes read diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index d626756ff721..a16fbd4e8241 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -133,14 +133,6 @@ befs_get_block(struct inode *inode, sector_t block, befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", (unsigned long)inode->i_ino, (long)block); - - if (block < 0) { - befs_error(sb, "befs_get_block() was asked for a block " - "number less than zero: block %ld in inode %lu", - (long)block, (unsigned long)inode->i_ino); - return -EIO; - } - if (create) { befs_error(sb, "befs_get_block() was asked to write to " "block %ld in inode %lu", (long)block, @@ -396,9 +388,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) if (S_ISLNK(inode->i_mode) && !(befs_ino->i_flags & BEFS_LONG_SYMLINK)){ inode->i_size = 0; inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; - strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink, - BEFS_SYMLINK_LEN - 1); - befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0'; + strlcpy(befs_ino->i_data.symlink, raw_inode->data.symlink, + BEFS_SYMLINK_LEN); } else { int num_blks; @@ -591,21 +582,21 @@ befs_utf2nls(struct super_block *sb, const char *in, /** * befs_nls2utf - Convert NLS string to utf8 encodeing * @sb: Superblock - * @src: Input string buffer in NLS format - * @srclen: Length of input string in bytes - * @dest: The output string in UTF-8 format - * @destlen: Length of the output buffer + * @in: Input string buffer in NLS format + * @in_len: Length of input string in bytes + * @out: The output string in UTF-8 format + * @out_len: Length of the output buffer * - * Converts input string @src, which is in the format of the loaded NLS map, + * Converts input string @in, which is in the format of the loaded NLS map, * into a utf8 string. * - * The destination string @dest is allocated by this function and the caller is + * The destination string @out is allocated by this function and the caller is * responsible for freeing it with kfree() * - * On return, *@destlen is the length of @dest in bytes. + * On return, *@out_len is the length of @out in bytes. * * On success, the return value is the number of utf8 characters written to - * the output buffer @dest. + * the output buffer @out. * * On Failure, a negative number coresponding to the error code is returned. */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index aa3cb626671e..3892c1a23241 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1108,6 +1108,14 @@ static bool always_dump_vma(struct vm_area_struct *vma) /* Any vsyscall mappings? */ if (vma == get_gate_vma(vma->vm_mm)) return true; + + /* + * Assume that all vmas with a .name op should always be dumped. + * If this changes, a new vm_ops field can easily be added. + */ + if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) + return true; + /* * arch_vma_name() returns non-NULL for special architecture mappings, * such as vDSO sections. @@ -1686,7 +1694,7 @@ static size_t get_note_info_size(struct elf_note_info *info) static int write_note_info(struct elf_note_info *info, struct coredump_params *cprm) { - bool first = 1; + bool first = true; struct elf_thread_core_info *t = info->thread; do { @@ -1710,7 +1718,7 @@ static int write_note_info(struct elf_note_info *info, !writenote(&t->notes[i], cprm)) return 0; - first = 0; + first = false; t = t->next; } while (t); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index d50bbe59da1e..f723cd3a455c 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -380,7 +380,7 @@ failed: /****************************************************************************/ -void old_reloc(unsigned long rl) +static void old_reloc(unsigned long rl) { #ifdef DEBUG char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" }; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c deleted file mode 100644 index 1c2ce0c87711..000000000000 --- a/fs/bio-integrity.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * bio-integrity.c - bio data integrity extensions - * - * Copyright (C) 2007, 2008, 2009 Oracle Corporation - * Written by: Martin K. Petersen <martin.petersen@oracle.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, - * USA. - * - */ - -#include <linux/blkdev.h> -#include <linux/mempool.h> -#include <linux/export.h> -#include <linux/bio.h> -#include <linux/workqueue.h> -#include <linux/slab.h> - -#define BIP_INLINE_VECS 4 - -static struct kmem_cache *bip_slab; -static struct workqueue_struct *kintegrityd_wq; - -/** - * bio_integrity_alloc - Allocate integrity payload and attach it to bio - * @bio: bio to attach integrity metadata to - * @gfp_mask: Memory allocation mask - * @nr_vecs: Number of integrity metadata scatter-gather elements - * - * Description: This function prepares a bio for attaching integrity - * metadata. nr_vecs specifies the maximum number of pages containing - * integrity metadata that can be attached. - */ -struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, - gfp_t gfp_mask, - unsigned int nr_vecs) -{ - struct bio_integrity_payload *bip; - struct bio_set *bs = bio->bi_pool; - unsigned long idx = BIO_POOL_NONE; - unsigned inline_vecs; - - if (!bs) { - bip = kmalloc(sizeof(struct bio_integrity_payload) + - sizeof(struct bio_vec) * nr_vecs, gfp_mask); - inline_vecs = nr_vecs; - } else { - bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); - inline_vecs = BIP_INLINE_VECS; - } - - if (unlikely(!bip)) - return NULL; - - memset(bip, 0, sizeof(*bip)); - - if (nr_vecs > inline_vecs) { - bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, - bs->bvec_integrity_pool); - if (!bip->bip_vec) - goto err; - } else { - bip->bip_vec = bip->bip_inline_vecs; - } - - bip->bip_slab = idx; - bip->bip_bio = bio; - bio->bi_integrity = bip; - - return bip; -err: - mempool_free(bip, bs->bio_integrity_pool); - return NULL; -} -EXPORT_SYMBOL(bio_integrity_alloc); - -/** - * bio_integrity_free - Free bio integrity payload - * @bio: bio containing bip to be freed - * - * Description: Used to free the integrity portion of a bio. Usually - * called from bio_free(). - */ -void bio_integrity_free(struct bio *bio) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - struct bio_set *bs = bio->bi_pool; - - if (bip->bip_owns_buf) - kfree(bip->bip_buf); - - if (bs) { - if (bip->bip_slab != BIO_POOL_NONE) - bvec_free(bs->bvec_integrity_pool, bip->bip_vec, - bip->bip_slab); - - mempool_free(bip, bs->bio_integrity_pool); - } else { - kfree(bip); - } - - bio->bi_integrity = NULL; -} -EXPORT_SYMBOL(bio_integrity_free); - -static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) -{ - if (bip->bip_slab == BIO_POOL_NONE) - return BIP_INLINE_VECS; - - return bvec_nr_vecs(bip->bip_slab); -} - -/** - * bio_integrity_add_page - Attach integrity metadata - * @bio: bio to update - * @page: page containing integrity metadata - * @len: number of bytes of integrity metadata in page - * @offset: start offset within page - * - * Description: Attach a page containing integrity metadata to bio. - */ -int bio_integrity_add_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - struct bio_vec *iv; - - if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { - printk(KERN_ERR "%s: bip_vec full\n", __func__); - return 0; - } - - iv = bip->bip_vec + bip->bip_vcnt; - - iv->bv_page = page; - iv->bv_len = len; - iv->bv_offset = offset; - bip->bip_vcnt++; - - return len; -} -EXPORT_SYMBOL(bio_integrity_add_page); - -static int bdev_integrity_enabled(struct block_device *bdev, int rw) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi == NULL) - return 0; - - if (rw == READ && bi->verify_fn != NULL && - (bi->flags & INTEGRITY_FLAG_READ)) - return 1; - - if (rw == WRITE && bi->generate_fn != NULL && - (bi->flags & INTEGRITY_FLAG_WRITE)) - return 1; - - return 0; -} - -/** - * bio_integrity_enabled - Check whether integrity can be passed - * @bio: bio to check - * - * Description: Determines whether bio_integrity_prep() can be called - * on this bio or not. bio data direction and target device must be - * set prior to calling. The functions honors the write_generate and - * read_verify flags in sysfs. - */ -int bio_integrity_enabled(struct bio *bio) -{ - if (!bio_is_rw(bio)) - return 0; - - /* Already protected? */ - if (bio_integrity(bio)) - return 0; - - return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio)); -} -EXPORT_SYMBOL(bio_integrity_enabled); - -/** - * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto - * @bi: blk_integrity profile for device - * @sectors: Number of 512 sectors to convert - * - * Description: The block layer calculates everything in 512 byte - * sectors but integrity metadata is done in terms of the hardware - * sector size of the storage device. Convert the block layer sectors - * to physical sectors. - */ -static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, - unsigned int sectors) -{ - /* At this point there are only 512b or 4096b DIF/EPP devices */ - if (bi->sector_size == 4096) - return sectors >>= 3; - - return sectors; -} - -static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, - unsigned int sectors) -{ - return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size; -} - -/** - * bio_integrity_tag_size - Retrieve integrity tag space - * @bio: bio to inspect - * - * Description: Returns the maximum number of tag bytes that can be - * attached to this bio. Filesystems can use this to determine how - * much metadata to attach to an I/O. - */ -unsigned int bio_integrity_tag_size(struct bio *bio) -{ - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - - BUG_ON(bio->bi_iter.bi_size == 0); - - return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size); -} -EXPORT_SYMBOL(bio_integrity_tag_size); - -static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, - int set) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - unsigned int nr_sectors; - - BUG_ON(bip->bip_buf == NULL); - - if (bi->tag_size == 0) - return -1; - - nr_sectors = bio_integrity_hw_sectors(bi, - DIV_ROUND_UP(len, bi->tag_size)); - - if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) { - printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__, - nr_sectors * bi->tuple_size, bip->bip_iter.bi_size); - return -1; - } - - if (set) - bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors); - else - bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors); - - return 0; -} - -/** - * bio_integrity_set_tag - Attach a tag buffer to a bio - * @bio: bio to attach buffer to - * @tag_buf: Pointer to a buffer containing tag data - * @len: Length of the included buffer - * - * Description: Use this function to tag a bio by leveraging the extra - * space provided by devices formatted with integrity protection. The - * size of the integrity buffer must be <= to the size reported by - * bio_integrity_tag_size(). - */ -int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len) -{ - BUG_ON(bio_data_dir(bio) != WRITE); - - return bio_integrity_tag(bio, tag_buf, len, 1); -} -EXPORT_SYMBOL(bio_integrity_set_tag); - -/** - * bio_integrity_get_tag - Retrieve a tag buffer from a bio - * @bio: bio to retrieve buffer from - * @tag_buf: Pointer to a buffer for the tag data - * @len: Length of the target buffer - * - * Description: Use this function to retrieve the tag buffer from a - * completed I/O. The size of the integrity buffer must be <= to the - * size reported by bio_integrity_tag_size(). - */ -int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) -{ - BUG_ON(bio_data_dir(bio) != READ); - - return bio_integrity_tag(bio, tag_buf, len, 0); -} -EXPORT_SYMBOL(bio_integrity_get_tag); - -/** - * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio - * @bio: bio to generate/verify integrity metadata for - * @operate: operate number, 1 for generate, 0 for verify - */ -static int bio_integrity_generate_verify(struct bio *bio, int operate) -{ - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - struct blk_integrity_exchg bix; - struct bio_vec *bv; - sector_t sector; - unsigned int sectors, ret = 0, i; - void *prot_buf = bio->bi_integrity->bip_buf; - - if (operate) - sector = bio->bi_iter.bi_sector; - else - sector = bio->bi_integrity->bip_iter.bi_sector; - - bix.disk_name = bio->bi_bdev->bd_disk->disk_name; - bix.sector_size = bi->sector_size; - - bio_for_each_segment_all(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page); - bix.data_buf = kaddr + bv->bv_offset; - bix.data_size = bv->bv_len; - bix.prot_buf = prot_buf; - bix.sector = sector; - - if (operate) - bi->generate_fn(&bix); - else { - ret = bi->verify_fn(&bix); - if (ret) { - kunmap_atomic(kaddr); - return ret; - } - } - - sectors = bv->bv_len / bi->sector_size; - sector += sectors; - prot_buf += sectors * bi->tuple_size; - - kunmap_atomic(kaddr); - } - return ret; -} - -/** - * bio_integrity_generate - Generate integrity metadata for a bio - * @bio: bio to generate integrity metadata for - * - * Description: Generates integrity metadata for a bio by calling the - * block device's generation callback function. The bio must have a - * bip attached with enough room to accommodate the generated - * integrity metadata. - */ -static void bio_integrity_generate(struct bio *bio) -{ - bio_integrity_generate_verify(bio, 1); -} - -static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) -{ - if (bi) - return bi->tuple_size; - - return 0; -} - -/** - * bio_integrity_prep - Prepare bio for integrity I/O - * @bio: bio to prepare - * - * Description: Allocates a buffer for integrity metadata, maps the - * pages and attaches them to a bio. The bio must have data - * direction, target device and start sector set priot to calling. In - * the WRITE case, integrity metadata will be generated using the - * block device's integrity function. In the READ case, the buffer - * will be prepared for DMA and a suitable end_io handler set up. - */ -int bio_integrity_prep(struct bio *bio) -{ - struct bio_integrity_payload *bip; - struct blk_integrity *bi; - struct request_queue *q; - void *buf; - unsigned long start, end; - unsigned int len, nr_pages; - unsigned int bytes, offset, i; - unsigned int sectors; - - bi = bdev_get_integrity(bio->bi_bdev); - q = bdev_get_queue(bio->bi_bdev); - BUG_ON(bi == NULL); - BUG_ON(bio_integrity(bio)); - - sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio)); - - /* Allocate kernel buffer for protection data */ - len = sectors * blk_integrity_tuple_size(bi); - buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); - if (unlikely(buf == NULL)) { - printk(KERN_ERR "could not allocate integrity buffer\n"); - return -ENOMEM; - } - - end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - start = ((unsigned long) buf) >> PAGE_SHIFT; - nr_pages = end - start; - - /* Allocate bio integrity payload and integrity vectors */ - bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages); - if (unlikely(bip == NULL)) { - printk(KERN_ERR "could not allocate data integrity bioset\n"); - kfree(buf); - return -EIO; - } - - bip->bip_owns_buf = 1; - bip->bip_buf = buf; - bip->bip_iter.bi_size = len; - bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; - - /* Map it */ - offset = offset_in_page(buf); - for (i = 0 ; i < nr_pages ; i++) { - int ret; - bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - ret = bio_integrity_add_page(bio, virt_to_page(buf), - bytes, offset); - - if (ret == 0) - return 0; - - if (ret < bytes) - break; - - buf += bytes; - len -= bytes; - offset = 0; - } - - /* Install custom I/O completion handler if read verify is enabled */ - if (bio_data_dir(bio) == READ) { - bip->bip_end_io = bio->bi_end_io; - bio->bi_end_io = bio_integrity_endio; - } - - /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) - bio_integrity_generate(bio); - - return 0; -} -EXPORT_SYMBOL(bio_integrity_prep); - -/** - * bio_integrity_verify - Verify integrity metadata for a bio - * @bio: bio to verify - * - * Description: This function is called to verify the integrity of a - * bio. The data in the bio io_vec is compared to the integrity - * metadata returned by the HBA. - */ -static int bio_integrity_verify(struct bio *bio) -{ - return bio_integrity_generate_verify(bio, 0); -} - -/** - * bio_integrity_verify_fn - Integrity I/O completion worker - * @work: Work struct stored in bio to be verified - * - * Description: This workqueue function is called to complete a READ - * request. The function verifies the transferred integrity metadata - * and then calls the original bio end_io function. - */ -static void bio_integrity_verify_fn(struct work_struct *work) -{ - struct bio_integrity_payload *bip = - container_of(work, struct bio_integrity_payload, bip_work); - struct bio *bio = bip->bip_bio; - int error; - - error = bio_integrity_verify(bio); - - /* Restore original bio completion handler */ - bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); -} - -/** - * bio_integrity_endio - Integrity I/O completion function - * @bio: Protected bio - * @error: Pointer to errno - * - * Description: Completion for integrity I/O - * - * Normally I/O completion is done in interrupt context. However, - * verifying I/O integrity is a time-consuming task which must be run - * in process context. This function postpones completion - * accordingly. - */ -void bio_integrity_endio(struct bio *bio, int error) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - - BUG_ON(bip->bip_bio != bio); - - /* In case of an I/O error there is no point in verifying the - * integrity metadata. Restore original bio end_io handler - * and run it. - */ - if (error) { - bio->bi_end_io = bip->bip_end_io; - bio_endio(bio, error); - - return; - } - - INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); - queue_work(kintegrityd_wq, &bip->bip_work); -} -EXPORT_SYMBOL(bio_integrity_endio); - -/** - * bio_integrity_advance - Advance integrity vector - * @bio: bio whose integrity vector to update - * @bytes_done: number of data bytes that have been completed - * - * Description: This function calculates how many integrity bytes the - * number of completed data bytes correspond to and advances the - * integrity vector accordingly. - */ -void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); - - bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); -} -EXPORT_SYMBOL(bio_integrity_advance); - -/** - * bio_integrity_trim - Trim integrity vector - * @bio: bio whose integrity vector to update - * @offset: offset to first data sector - * @sectors: number of data sectors - * - * Description: Used to trim the integrity vector in a cloned bio. - * The ivec will be advanced corresponding to 'offset' data sectors - * and the length will be truncated corresponding to 'len' data - * sectors. - */ -void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) -{ - struct bio_integrity_payload *bip = bio->bi_integrity; - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - - bio_integrity_advance(bio, offset << 9); - bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors); -} -EXPORT_SYMBOL(bio_integrity_trim); - -/** - * bio_integrity_clone - Callback for cloning bios with integrity metadata - * @bio: New bio - * @bio_src: Original bio - * @gfp_mask: Memory allocation mask - * - * Description: Called to allocate a bip when cloning a bio - */ -int bio_integrity_clone(struct bio *bio, struct bio *bio_src, - gfp_t gfp_mask) -{ - struct bio_integrity_payload *bip_src = bio_src->bi_integrity; - struct bio_integrity_payload *bip; - - BUG_ON(bip_src == NULL); - - bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); - - if (bip == NULL) - return -EIO; - - memcpy(bip->bip_vec, bip_src->bip_vec, - bip_src->bip_vcnt * sizeof(struct bio_vec)); - - bip->bip_vcnt = bip_src->bip_vcnt; - bip->bip_iter = bip_src->bip_iter; - - return 0; -} -EXPORT_SYMBOL(bio_integrity_clone); - -int bioset_integrity_create(struct bio_set *bs, int pool_size) -{ - if (bs->bio_integrity_pool) - return 0; - - bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); - if (!bs->bio_integrity_pool) - return -1; - - bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); - if (!bs->bvec_integrity_pool) { - mempool_destroy(bs->bio_integrity_pool); - return -1; - } - - return 0; -} -EXPORT_SYMBOL(bioset_integrity_create); - -void bioset_integrity_free(struct bio_set *bs) -{ - if (bs->bio_integrity_pool) - mempool_destroy(bs->bio_integrity_pool); - - if (bs->bvec_integrity_pool) - mempool_destroy(bs->bvec_integrity_pool); -} -EXPORT_SYMBOL(bioset_integrity_free); - -void __init bio_integrity_init(void) -{ - /* - * kintegrityd won't block much but may burn a lot of CPU cycles. - * Make it highpri CPU intensive wq with max concurrency of 1. - */ - kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1); - if (!kintegrityd_wq) - panic("Failed to create kintegrityd\n"); - - bip_slab = kmem_cache_create("bio_integrity_payload", - sizeof(struct bio_integrity_payload) + - sizeof(struct bio_vec) * BIP_INLINE_VECS, - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - if (!bip_slab) - panic("Failed to create slab\n"); -} diff --git a/fs/bio.c b/fs/bio.c deleted file mode 100644 index 6f0362b77806..000000000000 --- a/fs/bio.c +++ /dev/null @@ -1,2037 +0,0 @@ -/* - * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - * - */ -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/bio.h> -#include <linux/blkdev.h> -#include <linux/uio.h> -#include <linux/iocontext.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/mempool.h> -#include <linux/workqueue.h> -#include <linux/cgroup.h> -#include <scsi/sg.h> /* for struct sg_iovec */ - -#include <trace/events/block.h> - -/* - * Test patch to inline a certain number of bi_io_vec's inside the bio - * itself, to shrink a bio data allocation from two mempool calls to one - */ -#define BIO_INLINE_VECS 4 - -/* - * if you change this list, also change bvec_alloc or things will - * break badly! cannot be bigger than what you can fit into an - * unsigned short - */ -#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { - BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), -}; -#undef BV - -/* - * fs_bio_set is the bio_set containing bio and iovec memory pools used by - * IO code that does not need private memory pools. - */ -struct bio_set *fs_bio_set; -EXPORT_SYMBOL(fs_bio_set); - -/* - * Our slab pool management - */ -struct bio_slab { - struct kmem_cache *slab; - unsigned int slab_ref; - unsigned int slab_size; - char name[8]; -}; -static DEFINE_MUTEX(bio_slab_lock); -static struct bio_slab *bio_slabs; -static unsigned int bio_slab_nr, bio_slab_max; - -static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) -{ - unsigned int sz = sizeof(struct bio) + extra_size; - struct kmem_cache *slab = NULL; - struct bio_slab *bslab, *new_bio_slabs; - unsigned int new_bio_slab_max; - unsigned int i, entry = -1; - - mutex_lock(&bio_slab_lock); - - i = 0; - while (i < bio_slab_nr) { - bslab = &bio_slabs[i]; - - if (!bslab->slab && entry == -1) - entry = i; - else if (bslab->slab_size == sz) { - slab = bslab->slab; - bslab->slab_ref++; - break; - } - i++; - } - - if (slab) - goto out_unlock; - - if (bio_slab_nr == bio_slab_max && entry == -1) { - new_bio_slab_max = bio_slab_max << 1; - new_bio_slabs = krealloc(bio_slabs, - new_bio_slab_max * sizeof(struct bio_slab), - GFP_KERNEL); - if (!new_bio_slabs) - goto out_unlock; - bio_slab_max = new_bio_slab_max; - bio_slabs = new_bio_slabs; - } - if (entry == -1) - entry = bio_slab_nr++; - - bslab = &bio_slabs[entry]; - - snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); - slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); - if (!slab) - goto out_unlock; - - bslab->slab = slab; - bslab->slab_ref = 1; - bslab->slab_size = sz; -out_unlock: - mutex_unlock(&bio_slab_lock); - return slab; -} - -static void bio_put_slab(struct bio_set *bs) -{ - struct bio_slab *bslab = NULL; - unsigned int i; - - mutex_lock(&bio_slab_lock); - - for (i = 0; i < bio_slab_nr; i++) { - if (bs->bio_slab == bio_slabs[i].slab) { - bslab = &bio_slabs[i]; - break; - } - } - - if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n")) - goto out; - - WARN_ON(!bslab->slab_ref); - - if (--bslab->slab_ref) - goto out; - - kmem_cache_destroy(bslab->slab); - bslab->slab = NULL; - -out: - mutex_unlock(&bio_slab_lock); -} - -unsigned int bvec_nr_vecs(unsigned short idx) -{ - return bvec_slabs[idx].nr_vecs; -} - -void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) -{ - BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); - - if (idx == BIOVEC_MAX_IDX) - mempool_free(bv, pool); - else { - struct biovec_slab *bvs = bvec_slabs + idx; - - kmem_cache_free(bvs->slab, bv); - } -} - -struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, - mempool_t *pool) -{ - struct bio_vec *bvl; - - /* - * see comment near bvec_array define! - */ - switch (nr) { - case 1: - *idx = 0; - break; - case 2 ... 4: - *idx = 1; - break; - case 5 ... 16: - *idx = 2; - break; - case 17 ... 64: - *idx = 3; - break; - case 65 ... 128: - *idx = 4; - break; - case 129 ... BIO_MAX_PAGES: - *idx = 5; - break; - default: - return NULL; - } - - /* - * idx now points to the pool we want to allocate from. only the - * 1-vec entry pool is mempool backed. - */ - if (*idx == BIOVEC_MAX_IDX) { -fallback: - bvl = mempool_alloc(pool, gfp_mask); - } else { - struct biovec_slab *bvs = bvec_slabs + *idx; - gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); - - /* - * Make this allocation restricted and don't dump info on - * allocation failures, since we'll fallback to the mempool - * in case of failure. - */ - __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; - - /* - * Try a slab allocation. If this fails and __GFP_WAIT - * is set, retry with the 1-entry mempool - */ - bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); - if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { - *idx = BIOVEC_MAX_IDX; - goto fallback; - } - } - - return bvl; -} - -static void __bio_free(struct bio *bio) -{ - bio_disassociate_task(bio); - - if (bio_integrity(bio)) - bio_integrity_free(bio); -} - -static void bio_free(struct bio *bio) -{ - struct bio_set *bs = bio->bi_pool; - void *p; - - __bio_free(bio); - - if (bs) { - if (bio_flagged(bio, BIO_OWNS_VEC)) - bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); - - /* - * If we have front padding, adjust the bio pointer before freeing - */ - p = bio; - p -= bs->front_pad; - - mempool_free(p, bs->bio_pool); - } else { - /* Bio was allocated by bio_kmalloc() */ - kfree(bio); - } -} - -void bio_init(struct bio *bio) -{ - memset(bio, 0, sizeof(*bio)); - bio->bi_flags = 1 << BIO_UPTODATE; - atomic_set(&bio->bi_remaining, 1); - atomic_set(&bio->bi_cnt, 1); -} -EXPORT_SYMBOL(bio_init); - -/** - * bio_reset - reinitialize a bio - * @bio: bio to reset - * - * Description: - * After calling bio_reset(), @bio will be in the same state as a freshly - * allocated bio returned bio bio_alloc_bioset() - the only fields that are - * preserved are the ones that are initialized by bio_alloc_bioset(). See - * comment in struct bio. - */ -void bio_reset(struct bio *bio) -{ - unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); - - __bio_free(bio); - - memset(bio, 0, BIO_RESET_BYTES); - bio->bi_flags = flags|(1 << BIO_UPTODATE); - atomic_set(&bio->bi_remaining, 1); -} -EXPORT_SYMBOL(bio_reset); - -static void bio_chain_endio(struct bio *bio, int error) -{ - bio_endio(bio->bi_private, error); - bio_put(bio); -} - -/** - * bio_chain - chain bio completions - * - * The caller won't have a bi_end_io called when @bio completes - instead, - * @parent's bi_end_io won't be called until both @parent and @bio have - * completed; the chained bio will also be freed when it completes. - * - * The caller must not set bi_private or bi_end_io in @bio. - */ -void bio_chain(struct bio *bio, struct bio *parent) -{ - BUG_ON(bio->bi_private || bio->bi_end_io); - - bio->bi_private = parent; - bio->bi_end_io = bio_chain_endio; - atomic_inc(&parent->bi_remaining); -} -EXPORT_SYMBOL(bio_chain); - -static void bio_alloc_rescue(struct work_struct *work) -{ - struct bio_set *bs = container_of(work, struct bio_set, rescue_work); - struct bio *bio; - - while (1) { - spin_lock(&bs->rescue_lock); - bio = bio_list_pop(&bs->rescue_list); - spin_unlock(&bs->rescue_lock); - - if (!bio) - break; - - generic_make_request(bio); - } -} - -static void punt_bios_to_rescuer(struct bio_set *bs) -{ - struct bio_list punt, nopunt; - struct bio *bio; - - /* - * In order to guarantee forward progress we must punt only bios that - * were allocated from this bio_set; otherwise, if there was a bio on - * there for a stacking driver higher up in the stack, processing it - * could require allocating bios from this bio_set, and doing that from - * our own rescuer would be bad. - * - * Since bio lists are singly linked, pop them all instead of trying to - * remove from the middle of the list: - */ - - bio_list_init(&punt); - bio_list_init(&nopunt); - - while ((bio = bio_list_pop(current->bio_list))) - bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); - - *current->bio_list = nopunt; - - spin_lock(&bs->rescue_lock); - bio_list_merge(&bs->rescue_list, &punt); - spin_unlock(&bs->rescue_lock); - - queue_work(bs->rescue_workqueue, &bs->rescue_work); -} - -/** - * bio_alloc_bioset - allocate a bio for I/O - * @gfp_mask: the GFP_ mask given to the slab allocator - * @nr_iovecs: number of iovecs to pre-allocate - * @bs: the bio_set to allocate from. - * - * Description: - * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is - * backed by the @bs's mempool. - * - * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be - * able to allocate a bio. This is due to the mempool guarantees. To make this - * work, callers must never allocate more than 1 bio at a time from this pool. - * Callers that need to allocate more than 1 bio must always submit the - * previously allocated bio for IO before attempting to allocate a new one. - * Failure to do so can cause deadlocks under memory pressure. - * - * Note that when running under generic_make_request() (i.e. any block - * driver), bios are not submitted until after you return - see the code in - * generic_make_request() that converts recursion into iteration, to prevent - * stack overflows. - * - * This would normally mean allocating multiple bios under - * generic_make_request() would be susceptible to deadlocks, but we have - * deadlock avoidance code that resubmits any blocked bios from a rescuer - * thread. - * - * However, we do not guarantee forward progress for allocations from other - * mempools. Doing multiple allocations from the same mempool under - * generic_make_request() should be avoided - instead, use bio_set's front_pad - * for per bio allocations. - * - * RETURNS: - * Pointer to new bio on success, NULL on failure. - */ -struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) -{ - gfp_t saved_gfp = gfp_mask; - unsigned front_pad; - unsigned inline_vecs; - unsigned long idx = BIO_POOL_NONE; - struct bio_vec *bvl = NULL; - struct bio *bio; - void *p; - - if (!bs) { - if (nr_iovecs > UIO_MAXIOV) - return NULL; - - p = kmalloc(sizeof(struct bio) + - nr_iovecs * sizeof(struct bio_vec), - gfp_mask); - front_pad = 0; - inline_vecs = nr_iovecs; - } else { - /* - * generic_make_request() converts recursion to iteration; this - * means if we're running beneath it, any bios we allocate and - * submit will not be submitted (and thus freed) until after we - * return. - * - * This exposes us to a potential deadlock if we allocate - * multiple bios from the same bio_set() while running - * underneath generic_make_request(). If we were to allocate - * multiple bios (say a stacking block driver that was splitting - * bios), we would deadlock if we exhausted the mempool's - * reserve. - * - * We solve this, and guarantee forward progress, with a rescuer - * workqueue per bio_set. If we go to allocate and there are - * bios on current->bio_list, we first try the allocation - * without __GFP_WAIT; if that fails, we punt those bios we - * would be blocking to the rescuer workqueue before we retry - * with the original gfp_flags. - */ - - if (current->bio_list && !bio_list_empty(current->bio_list)) - gfp_mask &= ~__GFP_WAIT; - - p = mempool_alloc(bs->bio_pool, gfp_mask); - if (!p && gfp_mask != saved_gfp) { - punt_bios_to_rescuer(bs); - gfp_mask = saved_gfp; - p = mempool_alloc(bs->bio_pool, gfp_mask); - } - - front_pad = bs->front_pad; - inline_vecs = BIO_INLINE_VECS; - } - - if (unlikely(!p)) - return NULL; - - bio = p + front_pad; - bio_init(bio); - - if (nr_iovecs > inline_vecs) { - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); - if (!bvl && gfp_mask != saved_gfp) { - punt_bios_to_rescuer(bs); - gfp_mask = saved_gfp; - bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); - } - - if (unlikely(!bvl)) - goto err_free; - - bio->bi_flags |= 1 << BIO_OWNS_VEC; - } else if (nr_iovecs) { - bvl = bio->bi_inline_vecs; - } - - bio->bi_pool = bs; - bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = nr_iovecs; - bio->bi_io_vec = bvl; - return bio; - -err_free: - mempool_free(p, bs->bio_pool); - return NULL; -} -EXPORT_SYMBOL(bio_alloc_bioset); - -void zero_fill_bio(struct bio *bio) -{ - unsigned long flags; - struct bio_vec bv; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio, iter) { - char *data = bvec_kmap_irq(&bv, &flags); - memset(data, 0, bv.bv_len); - flush_dcache_page(bv.bv_page); - bvec_kunmap_irq(data, &flags); - } -} -EXPORT_SYMBOL(zero_fill_bio); - -/** - * bio_put - release a reference to a bio - * @bio: bio to release reference to - * - * Description: - * Put a reference to a &struct bio, either one you have gotten with - * bio_alloc, bio_get or bio_clone. The last put of a bio will free it. - **/ -void bio_put(struct bio *bio) -{ - BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); - - /* - * last put frees it - */ - if (atomic_dec_and_test(&bio->bi_cnt)) - bio_free(bio); -} -EXPORT_SYMBOL(bio_put); - -inline int bio_phys_segments(struct request_queue *q, struct bio *bio) -{ - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) - blk_recount_segments(q, bio); - - return bio->bi_phys_segments; -} -EXPORT_SYMBOL(bio_phys_segments); - -/** - * __bio_clone_fast - clone a bio that shares the original bio's biovec - * @bio: destination bio - * @bio_src: bio to clone - * - * Clone a &bio. Caller will own the returned bio, but not - * the actual data it points to. Reference count of returned - * bio will be one. - * - * Caller must ensure that @bio_src is not freed before @bio. - */ -void __bio_clone_fast(struct bio *bio, struct bio *bio_src) -{ - BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); - - /* - * most users will be overriding ->bi_bdev with a new target, - * so we don't set nor calculate new physical/hw segment counts here - */ - bio->bi_bdev = bio_src->bi_bdev; - bio->bi_flags |= 1 << BIO_CLONED; - bio->bi_rw = bio_src->bi_rw; - bio->bi_iter = bio_src->bi_iter; - bio->bi_io_vec = bio_src->bi_io_vec; -} -EXPORT_SYMBOL(__bio_clone_fast); - -/** - * bio_clone_fast - clone a bio that shares the original bio's biovec - * @bio: bio to clone - * @gfp_mask: allocation priority - * @bs: bio_set to allocate from - * - * Like __bio_clone_fast, only also allocates the returned bio - */ -struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) -{ - struct bio *b; - - b = bio_alloc_bioset(gfp_mask, 0, bs); - if (!b) - return NULL; - - __bio_clone_fast(b, bio); - - if (bio_integrity(bio)) { - int ret; - - ret = bio_integrity_clone(b, bio, gfp_mask); - - if (ret < 0) { - bio_put(b); - return NULL; - } - } - - return b; -} -EXPORT_SYMBOL(bio_clone_fast); - -/** - * bio_clone_bioset - clone a bio - * @bio_src: bio to clone - * @gfp_mask: allocation priority - * @bs: bio_set to allocate from - * - * Clone bio. Caller will own the returned bio, but not the actual data it - * points to. Reference count of returned bio will be one. - */ -struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, - struct bio_set *bs) -{ - struct bvec_iter iter; - struct bio_vec bv; - struct bio *bio; - - /* - * Pre immutable biovecs, __bio_clone() used to just do a memcpy from - * bio_src->bi_io_vec to bio->bi_io_vec. - * - * We can't do that anymore, because: - * - * - The point of cloning the biovec is to produce a bio with a biovec - * the caller can modify: bi_idx and bi_bvec_done should be 0. - * - * - The original bio could've had more than BIO_MAX_PAGES biovecs; if - * we tried to clone the whole thing bio_alloc_bioset() would fail. - * But the clone should succeed as long as the number of biovecs we - * actually need to allocate is fewer than BIO_MAX_PAGES. - * - * - Lastly, bi_vcnt should not be looked at or relied upon by code - * that does not own the bio - reason being drivers don't use it for - * iterating over the biovec anymore, so expecting it to be kept up - * to date (i.e. for clones that share the parent biovec) is just - * asking for trouble and would force extra work on - * __bio_clone_fast() anyways. - */ - - bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); - if (!bio) - return NULL; - - bio->bi_bdev = bio_src->bi_bdev; - bio->bi_rw = bio_src->bi_rw; - bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; - bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - - if (bio->bi_rw & REQ_DISCARD) - goto integrity_clone; - - if (bio->bi_rw & REQ_WRITE_SAME) { - bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; - goto integrity_clone; - } - - bio_for_each_segment(bv, bio_src, iter) - bio->bi_io_vec[bio->bi_vcnt++] = bv; - -integrity_clone: - if (bio_integrity(bio_src)) { - int ret; - - ret = bio_integrity_clone(bio, bio_src, gfp_mask); - if (ret < 0) { - bio_put(bio); - return NULL; - } - } - - return bio; -} -EXPORT_SYMBOL(bio_clone_bioset); - -/** - * bio_get_nr_vecs - return approx number of vecs - * @bdev: I/O target - * - * Return the approximate number of pages we can send to this target. - * There's no guarantee that you will be able to fit this number of pages - * into a bio, it does not account for dynamic restrictions that vary - * on offset. - */ -int bio_get_nr_vecs(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - int nr_pages; - - nr_pages = min_t(unsigned, - queue_max_segments(q), - queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); - - return min_t(unsigned, nr_pages, BIO_MAX_PAGES); - -} -EXPORT_SYMBOL(bio_get_nr_vecs); - -static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page - *page, unsigned int len, unsigned int offset, - unsigned int max_sectors) -{ - int retried_segments = 0; - struct bio_vec *bvec; - - /* - * cloned bio must not modify vec list - */ - if (unlikely(bio_flagged(bio, BIO_CLONED))) - return 0; - - if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors) - return 0; - - /* - * For filesystems with a blocksize smaller than the pagesize - * we will often be called with the same page as last time and - * a consecutive offset. Optimize this special case. - */ - if (bio->bi_vcnt > 0) { - struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - if (page == prev->bv_page && - offset == prev->bv_offset + prev->bv_len) { - unsigned int prev_bv_len = prev->bv_len; - prev->bv_len += len; - - if (q->merge_bvec_fn) { - struct bvec_merge_data bvm = { - /* prev_bvec is already charged in - bi_size, discharge it in order to - simulate merging updated prev_bvec - as new bvec. */ - .bi_bdev = bio->bi_bdev, - .bi_sector = bio->bi_iter.bi_sector, - .bi_size = bio->bi_iter.bi_size - - prev_bv_len, - .bi_rw = bio->bi_rw, - }; - - if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) { - prev->bv_len -= len; - return 0; - } - } - - goto done; - } - } - - if (bio->bi_vcnt >= bio->bi_max_vecs) - return 0; - - /* - * we might lose a segment or two here, but rather that than - * make this too complex. - */ - - while (bio->bi_phys_segments >= queue_max_segments(q)) { - - if (retried_segments) - return 0; - - retried_segments = 1; - blk_recount_segments(q, bio); - } - - /* - * setup the new entry, we might clear it again later if we - * cannot add the page - */ - bvec = &bio->bi_io_vec[bio->bi_vcnt]; - bvec->bv_page = page; - bvec->bv_len = len; - bvec->bv_offset = offset; - - /* - * if queue has other restrictions (eg varying max sector size - * depending on offset), it can specify a merge_bvec_fn in the - * queue to get further control - */ - if (q->merge_bvec_fn) { - struct bvec_merge_data bvm = { - .bi_bdev = bio->bi_bdev, - .bi_sector = bio->bi_iter.bi_sector, - .bi_size = bio->bi_iter.bi_size, - .bi_rw = bio->bi_rw, - }; - - /* - * merge_bvec_fn() returns number of bytes it can accept - * at this offset - */ - if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { - bvec->bv_page = NULL; - bvec->bv_len = 0; - bvec->bv_offset = 0; - return 0; - } - } - - /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) - bio->bi_flags &= ~(1 << BIO_SEG_VALID); - - bio->bi_vcnt++; - bio->bi_phys_segments++; - done: - bio->bi_iter.bi_size += len; - return len; -} - -/** - * bio_add_pc_page - attempt to add page to bio - * @q: the target queue - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * - * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block device - * limitations. The target block device must allow bio's up to PAGE_SIZE, - * so it is always possible to add a single page to an empty bio. - * - * This should only be used by REQ_PC bios. - */ -int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, - unsigned int len, unsigned int offset) -{ - return __bio_add_page(q, bio, page, len, offset, - queue_max_hw_sectors(q)); -} -EXPORT_SYMBOL(bio_add_pc_page); - -/** - * bio_add_page - attempt to add page to bio - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * - * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block device - * limitations. The target block device must allow bio's up to PAGE_SIZE, - * so it is always possible to add a single page to an empty bio. - */ -int bio_add_page(struct bio *bio, struct page *page, unsigned int len, - unsigned int offset) -{ - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q)); -} -EXPORT_SYMBOL(bio_add_page); - -struct submit_bio_ret { - struct completion event; - int error; -}; - -static void submit_bio_wait_endio(struct bio *bio, int error) -{ - struct submit_bio_ret *ret = bio->bi_private; - - ret->error = error; - complete(&ret->event); -} - -/** - * submit_bio_wait - submit a bio, and wait until it completes - * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bio: The &struct bio which describes the I/O - * - * Simple wrapper around submit_bio(). Returns 0 on success, or the error from - * bio_endio() on failure. - */ -int submit_bio_wait(int rw, struct bio *bio) -{ - struct submit_bio_ret ret; - - rw |= REQ_SYNC; - init_completion(&ret.event); - bio->bi_private = &ret; - bio->bi_end_io = submit_bio_wait_endio; - submit_bio(rw, bio); - wait_for_completion(&ret.event); - - return ret.error; -} -EXPORT_SYMBOL(submit_bio_wait); - -/** - * bio_advance - increment/complete a bio by some number of bytes - * @bio: bio to advance - * @bytes: number of bytes to complete - * - * This updates bi_sector, bi_size and bi_idx; if the number of bytes to - * complete doesn't align with a bvec boundary, then bv_len and bv_offset will - * be updated on the last bvec as well. - * - * @bio will then represent the remaining, uncompleted portion of the io. - */ -void bio_advance(struct bio *bio, unsigned bytes) -{ - if (bio_integrity(bio)) - bio_integrity_advance(bio, bytes); - - bio_advance_iter(bio, &bio->bi_iter, bytes); -} -EXPORT_SYMBOL(bio_advance); - -/** - * bio_alloc_pages - allocates a single page for each bvec in a bio - * @bio: bio to allocate pages for - * @gfp_mask: flags for allocation - * - * Allocates pages up to @bio->bi_vcnt. - * - * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are - * freed. - */ -int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) -{ - int i; - struct bio_vec *bv; - - bio_for_each_segment_all(bv, bio, i) { - bv->bv_page = alloc_page(gfp_mask); - if (!bv->bv_page) { - while (--bv >= bio->bi_io_vec) - __free_page(bv->bv_page); - return -ENOMEM; - } - } - - return 0; -} -EXPORT_SYMBOL(bio_alloc_pages); - -/** - * bio_copy_data - copy contents of data buffers from one chain of bios to - * another - * @src: source bio list - * @dst: destination bio list - * - * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats - * @src and @dst as linked lists of bios. - * - * Stops when it reaches the end of either @src or @dst - that is, copies - * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). - */ -void bio_copy_data(struct bio *dst, struct bio *src) -{ - struct bvec_iter src_iter, dst_iter; - struct bio_vec src_bv, dst_bv; - void *src_p, *dst_p; - unsigned bytes; - - src_iter = src->bi_iter; - dst_iter = dst->bi_iter; - - while (1) { - if (!src_iter.bi_size) { - src = src->bi_next; - if (!src) - break; - - src_iter = src->bi_iter; - } - - if (!dst_iter.bi_size) { - dst = dst->bi_next; - if (!dst) - break; - - dst_iter = dst->bi_iter; - } - - src_bv = bio_iter_iovec(src, src_iter); - dst_bv = bio_iter_iovec(dst, dst_iter); - - bytes = min(src_bv.bv_len, dst_bv.bv_len); - - src_p = kmap_atomic(src_bv.bv_page); - dst_p = kmap_atomic(dst_bv.bv_page); - - memcpy(dst_p + dst_bv.bv_offset, - src_p + src_bv.bv_offset, - bytes); - - kunmap_atomic(dst_p); - kunmap_atomic(src_p); - - bio_advance_iter(src, &src_iter, bytes); - bio_advance_iter(dst, &dst_iter, bytes); - } -} -EXPORT_SYMBOL(bio_copy_data); - -struct bio_map_data { - int nr_sgvecs; - int is_our_pages; - struct sg_iovec sgvecs[]; -}; - -static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - const struct sg_iovec *iov, int iov_count, - int is_our_pages) -{ - memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); - bmd->nr_sgvecs = iov_count; - bmd->is_our_pages = is_our_pages; - bio->bi_private = bmd; -} - -static struct bio_map_data *bio_alloc_map_data(int nr_segs, - unsigned int iov_count, - gfp_t gfp_mask) -{ - if (iov_count > UIO_MAXIOV) - return NULL; - - return kmalloc(sizeof(struct bio_map_data) + - sizeof(struct sg_iovec) * iov_count, gfp_mask); -} - -static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, - int to_user, int from_user, int do_free_page) -{ - int ret = 0, i; - struct bio_vec *bvec; - int iov_idx = 0; - unsigned int iov_off = 0; - - bio_for_each_segment_all(bvec, bio, i) { - char *bv_addr = page_address(bvec->bv_page); - unsigned int bv_len = bvec->bv_len; - - while (bv_len && iov_idx < iov_count) { - unsigned int bytes; - char __user *iov_addr; - - bytes = min_t(unsigned int, - iov[iov_idx].iov_len - iov_off, bv_len); - iov_addr = iov[iov_idx].iov_base + iov_off; - - if (!ret) { - if (to_user) - ret = copy_to_user(iov_addr, bv_addr, - bytes); - - if (from_user) - ret = copy_from_user(bv_addr, iov_addr, - bytes); - - if (ret) - ret = -EFAULT; - } - - bv_len -= bytes; - bv_addr += bytes; - iov_addr += bytes; - iov_off += bytes; - - if (iov[iov_idx].iov_len == iov_off) { - iov_idx++; - iov_off = 0; - } - } - - if (do_free_page) - __free_page(bvec->bv_page); - } - - return ret; -} - -/** - * bio_uncopy_user - finish previously mapped bio - * @bio: bio being terminated - * - * Free pages allocated from bio_copy_user() and write back data - * to user space in case of a read. - */ -int bio_uncopy_user(struct bio *bio) -{ - struct bio_map_data *bmd = bio->bi_private; - struct bio_vec *bvec; - int ret = 0, i; - - if (!bio_flagged(bio, BIO_NULL_MAPPED)) { - /* - * if we're in a workqueue, the request is orphaned, so - * don't copy into a random user address space, just free. - */ - if (current->mm) - ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, - bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); - else if (bmd->is_our_pages) - bio_for_each_segment_all(bvec, bio, i) - __free_page(bvec->bv_page); - } - kfree(bmd); - bio_put(bio); - return ret; -} -EXPORT_SYMBOL(bio_uncopy_user); - -/** - * bio_copy_user_iov - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @iov: the iovec. - * @iov_count: number of elements in the iovec - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags - * - * Prepares and returns a bio for indirect user io, bouncing data - * to/from kernel pages as necessary. Must be paired with - * call bio_uncopy_user() on io completion. - */ -struct bio *bio_copy_user_iov(struct request_queue *q, - struct rq_map_data *map_data, - const struct sg_iovec *iov, int iov_count, - int write_to_vm, gfp_t gfp_mask) -{ - struct bio_map_data *bmd; - struct bio_vec *bvec; - struct page *page; - struct bio *bio; - int i, ret; - int nr_pages = 0; - unsigned int len = 0; - unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; - - for (i = 0; i < iov_count; i++) { - unsigned long uaddr; - unsigned long end; - unsigned long start; - - uaddr = (unsigned long)iov[i].iov_base; - end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; - start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - len += iov[i].iov_len; - } - - if (offset) - nr_pages++; - - bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); - if (!bmd) - return ERR_PTR(-ENOMEM); - - ret = -ENOMEM; - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - goto out_bmd; - - if (!write_to_vm) - bio->bi_rw |= REQ_WRITE; - - ret = 0; - - if (map_data) { - nr_pages = 1 << map_data->page_order; - i = map_data->offset / PAGE_SIZE; - } - while (len) { - unsigned int bytes = PAGE_SIZE; - - bytes -= offset; - - if (bytes > len) - bytes = len; - - if (map_data) { - if (i == map_data->nr_entries * nr_pages) { - ret = -ENOMEM; - break; - } - - page = map_data->pages[i / nr_pages]; - page += (i % nr_pages); - - i++; - } else { - page = alloc_page(q->bounce_gfp | gfp_mask); - if (!page) { - ret = -ENOMEM; - break; - } - } - - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) - break; - - len -= bytes; - offset = 0; - } - - if (ret) - goto cleanup; - - /* - * success - */ - if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { - ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0); - if (ret) - goto cleanup; - } - - bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); - return bio; -cleanup: - if (!map_data) - bio_for_each_segment_all(bvec, bio, i) - __free_page(bvec->bv_page); - - bio_put(bio); -out_bmd: - kfree(bmd); - return ERR_PTR(ret); -} - -/** - * bio_copy_user - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @uaddr: start of user address - * @len: length in bytes - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags - * - * Prepares and returns a bio for indirect user io, bouncing data - * to/from kernel pages as necessary. Must be paired with - * call bio_uncopy_user() on io completion. - */ -struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, - unsigned long uaddr, unsigned int len, - int write_to_vm, gfp_t gfp_mask) -{ - struct sg_iovec iov; - - iov.iov_base = (void __user *)uaddr; - iov.iov_len = len; - - return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); -} -EXPORT_SYMBOL(bio_copy_user); - -static struct bio *__bio_map_user_iov(struct request_queue *q, - struct block_device *bdev, - const struct sg_iovec *iov, int iov_count, - int write_to_vm, gfp_t gfp_mask) -{ - int i, j; - int nr_pages = 0; - struct page **pages; - struct bio *bio; - int cur_page = 0; - int ret, offset; - - for (i = 0; i < iov_count; i++) { - unsigned long uaddr = (unsigned long)iov[i].iov_base; - unsigned long len = iov[i].iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - /* - * buffer must be aligned to at least hardsector size for now - */ - if (uaddr & queue_dma_alignment(q)) - return ERR_PTR(-EINVAL); - } - - if (!nr_pages) - return ERR_PTR(-EINVAL); - - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - return ERR_PTR(-ENOMEM); - - ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); - if (!pages) - goto out; - - for (i = 0; i < iov_count; i++) { - unsigned long uaddr = (unsigned long)iov[i].iov_base; - unsigned long len = iov[i].iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int local_nr_pages = end - start; - const int page_limit = cur_page + local_nr_pages; - - ret = get_user_pages_fast(uaddr, local_nr_pages, - write_to_vm, &pages[cur_page]); - if (ret < local_nr_pages) { - ret = -EFAULT; - goto out_unmap; - } - - offset = uaddr & ~PAGE_MASK; - for (j = cur_page; j < page_limit; j++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - /* - * sorry... - */ - if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < - bytes) - break; - - len -= bytes; - offset = 0; - } - - cur_page = j; - /* - * release the pages we didn't map into the bio, if any - */ - while (j < page_limit) - page_cache_release(pages[j++]); - } - - kfree(pages); - - /* - * set data direction, and check if mapped pages need bouncing - */ - if (!write_to_vm) - bio->bi_rw |= REQ_WRITE; - - bio->bi_bdev = bdev; - bio->bi_flags |= (1 << BIO_USER_MAPPED); - return bio; - - out_unmap: - for (i = 0; i < nr_pages; i++) { - if(!pages[i]) - break; - page_cache_release(pages[i]); - } - out: - kfree(pages); - bio_put(bio); - return ERR_PTR(ret); -} - -/** - * bio_map_user - map user address into bio - * @q: the struct request_queue for the bio - * @bdev: destination block device - * @uaddr: start of user address - * @len: length in bytes - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, int write_to_vm, - gfp_t gfp_mask) -{ - struct sg_iovec iov; - - iov.iov_base = (void __user *)uaddr; - iov.iov_len = len; - - return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); -} -EXPORT_SYMBOL(bio_map_user); - -/** - * bio_map_user_iov - map user sg_iovec table into bio - * @q: the struct request_queue for the bio - * @bdev: destination block device - * @iov: the iovec. - * @iov_count: number of elements in the iovec - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, - const struct sg_iovec *iov, int iov_count, - int write_to_vm, gfp_t gfp_mask) -{ - struct bio *bio; - - bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, - gfp_mask); - if (IS_ERR(bio)) - return bio; - - /* - * subtle -- if __bio_map_user() ended up bouncing a bio, - * it would normally disappear when its bi_end_io is run. - * however, we need it for the unmap, so grab an extra - * reference to it - */ - bio_get(bio); - - return bio; -} - -static void __bio_unmap_user(struct bio *bio) -{ - struct bio_vec *bvec; - int i; - - /* - * make sure we dirty pages we wrote to - */ - bio_for_each_segment_all(bvec, bio, i) { - if (bio_data_dir(bio) == READ) - set_page_dirty_lock(bvec->bv_page); - - page_cache_release(bvec->bv_page); - } - - bio_put(bio); -} - -/** - * bio_unmap_user - unmap a bio - * @bio: the bio being unmapped - * - * Unmap a bio previously mapped by bio_map_user(). Must be called with - * a process context. - * - * bio_unmap_user() may sleep. - */ -void bio_unmap_user(struct bio *bio) -{ - __bio_unmap_user(bio); - bio_put(bio); -} -EXPORT_SYMBOL(bio_unmap_user); - -static void bio_map_kern_endio(struct bio *bio, int err) -{ - bio_put(bio); -} - -static struct bio *__bio_map_kern(struct request_queue *q, void *data, - unsigned int len, gfp_t gfp_mask) -{ - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int offset, i; - struct bio *bio; - - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - return ERR_PTR(-ENOMEM); - - offset = offset_in_page(kaddr); - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, - offset) < bytes) - break; - - data += bytes; - len -= bytes; - offset = 0; - } - - bio->bi_end_io = bio_map_kern_endio; - return bio; -} - -/** - * bio_map_kern - map kernel address into bio - * @q: the struct request_queue for the bio - * @data: pointer to buffer to map - * @len: length in bytes - * @gfp_mask: allocation flags for bio allocation - * - * Map the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, - gfp_t gfp_mask) -{ - struct bio *bio; - - bio = __bio_map_kern(q, data, len, gfp_mask); - if (IS_ERR(bio)) - return bio; - - if (bio->bi_iter.bi_size == len) - return bio; - - /* - * Don't support partial mappings. - */ - bio_put(bio); - return ERR_PTR(-EINVAL); -} -EXPORT_SYMBOL(bio_map_kern); - -static void bio_copy_kern_endio(struct bio *bio, int err) -{ - struct bio_vec *bvec; - const int read = bio_data_dir(bio) == READ; - struct bio_map_data *bmd = bio->bi_private; - int i; - char *p = bmd->sgvecs[0].iov_base; - - bio_for_each_segment_all(bvec, bio, i) { - char *addr = page_address(bvec->bv_page); - - if (read) - memcpy(p, addr, bvec->bv_len); - - __free_page(bvec->bv_page); - p += bvec->bv_len; - } - - kfree(bmd); - bio_put(bio); -} - -/** - * bio_copy_kern - copy kernel address into bio - * @q: the struct request_queue for the bio - * @data: pointer to buffer to copy - * @len: length in bytes - * @gfp_mask: allocation flags for bio and page allocation - * @reading: data direction is READ - * - * copy the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, - gfp_t gfp_mask, int reading) -{ - struct bio *bio; - struct bio_vec *bvec; - int i; - - bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); - if (IS_ERR(bio)) - return bio; - - if (!reading) { - void *p = data; - - bio_for_each_segment_all(bvec, bio, i) { - char *addr = page_address(bvec->bv_page); - - memcpy(addr, p, bvec->bv_len); - p += bvec->bv_len; - } - } - - bio->bi_end_io = bio_copy_kern_endio; - - return bio; -} -EXPORT_SYMBOL(bio_copy_kern); - -/* - * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions - * for performing direct-IO in BIOs. - * - * The problem is that we cannot run set_page_dirty() from interrupt context - * because the required locks are not interrupt-safe. So what we can do is to - * mark the pages dirty _before_ performing IO. And in interrupt context, - * check that the pages are still dirty. If so, fine. If not, redirty them - * in process context. - * - * We special-case compound pages here: normally this means reads into hugetlb - * pages. The logic in here doesn't really work right for compound pages - * because the VM does not uniformly chase down the head page in all cases. - * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't - * handle them at all. So we skip compound pages here at an early stage. - * - * Note that this code is very hard to test under normal circumstances because - * direct-io pins the pages with get_user_pages(). This makes - * is_page_cache_freeable return false, and the VM will not clean the pages. - * But other code (eg, flusher threads) could clean the pages if they are mapped - * pagecache. - * - * Simply disabling the call to bio_set_pages_dirty() is a good way to test the - * deferred bio dirtying paths. - */ - -/* - * bio_set_pages_dirty() will mark all the bio's pages as dirty. - */ -void bio_set_pages_dirty(struct bio *bio) -{ - struct bio_vec *bvec; - int i; - - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (page && !PageCompound(page)) - set_page_dirty_lock(page); - } -} - -static void bio_release_pages(struct bio *bio) -{ - struct bio_vec *bvec; - int i; - - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (page) - put_page(page); - } -} - -/* - * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. - * If they are, then fine. If, however, some pages are clean then they must - * have been written out during the direct-IO read. So we take another ref on - * the BIO and the offending pages and re-dirty the pages in process context. - * - * It is expected that bio_check_pages_dirty() will wholly own the BIO from - * here on. It will run one page_cache_release() against each page and will - * run one bio_put() against the BIO. - */ - -static void bio_dirty_fn(struct work_struct *work); - -static DECLARE_WORK(bio_dirty_work, bio_dirty_fn); -static DEFINE_SPINLOCK(bio_dirty_lock); -static struct bio *bio_dirty_list; - -/* - * This runs in process context - */ -static void bio_dirty_fn(struct work_struct *work) -{ - unsigned long flags; - struct bio *bio; - - spin_lock_irqsave(&bio_dirty_lock, flags); - bio = bio_dirty_list; - bio_dirty_list = NULL; - spin_unlock_irqrestore(&bio_dirty_lock, flags); - - while (bio) { - struct bio *next = bio->bi_private; - - bio_set_pages_dirty(bio); - bio_release_pages(bio); - bio_put(bio); - bio = next; - } -} - -void bio_check_pages_dirty(struct bio *bio) -{ - struct bio_vec *bvec; - int nr_clean_pages = 0; - int i; - - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (PageDirty(page) || PageCompound(page)) { - page_cache_release(page); - bvec->bv_page = NULL; - } else { - nr_clean_pages++; - } - } - - if (nr_clean_pages) { - unsigned long flags; - - spin_lock_irqsave(&bio_dirty_lock, flags); - bio->bi_private = bio_dirty_list; - bio_dirty_list = bio; - spin_unlock_irqrestore(&bio_dirty_lock, flags); - schedule_work(&bio_dirty_work); - } else { - bio_put(bio); - } -} - -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -void bio_flush_dcache_pages(struct bio *bi) -{ - struct bio_vec bvec; - struct bvec_iter iter; - - bio_for_each_segment(bvec, bi, iter) - flush_dcache_page(bvec.bv_page); -} -EXPORT_SYMBOL(bio_flush_dcache_pages); -#endif - -/** - * bio_endio - end I/O on a bio - * @bio: bio - * @error: error, if any - * - * Description: - * bio_endio() will end I/O on the whole bio. bio_endio() is the - * preferred way to end I/O on a bio, it takes care of clearing - * BIO_UPTODATE on error. @error is 0 on success, and and one of the - * established -Exxxx (-EIO, for instance) error values in case - * something went wrong. No one should call bi_end_io() directly on a - * bio unless they own it and thus know that it has an end_io - * function. - **/ -void bio_endio(struct bio *bio, int error) -{ - while (bio) { - BUG_ON(atomic_read(&bio->bi_remaining) <= 0); - - if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; - - if (!atomic_dec_and_test(&bio->bi_remaining)) - return; - - /* - * Need to have a real endio function for chained bios, - * otherwise various corner cases will break (like stacking - * block devices that save/restore bi_end_io) - however, we want - * to avoid unbounded recursion and blowing the stack. Tail call - * optimization would handle this, but compiling with frame - * pointers also disables gcc's sibling call optimization. - */ - if (bio->bi_end_io == bio_chain_endio) { - struct bio *parent = bio->bi_private; - bio_put(bio); - bio = parent; - } else { - if (bio->bi_end_io) - bio->bi_end_io(bio, error); - bio = NULL; - } - } -} -EXPORT_SYMBOL(bio_endio); - -/** - * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining - * @bio: bio - * @error: error, if any - * - * For code that has saved and restored bi_end_io; thing hard before using this - * function, probably you should've cloned the entire bio. - **/ -void bio_endio_nodec(struct bio *bio, int error) -{ - atomic_inc(&bio->bi_remaining); - bio_endio(bio, error); -} -EXPORT_SYMBOL(bio_endio_nodec); - -/** - * bio_split - split a bio - * @bio: bio to split - * @sectors: number of sectors to split from the front of @bio - * @gfp: gfp mask - * @bs: bio set to allocate from - * - * Allocates and returns a new bio which represents @sectors from the start of - * @bio, and updates @bio to represent the remaining sectors. - * - * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's - * responsibility to ensure that @bio is not freed before the split. - */ -struct bio *bio_split(struct bio *bio, int sectors, - gfp_t gfp, struct bio_set *bs) -{ - struct bio *split = NULL; - - BUG_ON(sectors <= 0); - BUG_ON(sectors >= bio_sectors(bio)); - - split = bio_clone_fast(bio, gfp, bs); - if (!split) - return NULL; - - split->bi_iter.bi_size = sectors << 9; - - if (bio_integrity(split)) - bio_integrity_trim(split, 0, sectors); - - bio_advance(bio, split->bi_iter.bi_size); - - return split; -} -EXPORT_SYMBOL(bio_split); - -/** - * bio_trim - trim a bio - * @bio: bio to trim - * @offset: number of sectors to trim from the front of @bio - * @size: size we want to trim @bio to, in sectors - */ -void bio_trim(struct bio *bio, int offset, int size) -{ - /* 'bio' is a cloned bio which we need to trim to match - * the given offset and size. - */ - - size <<= 9; - if (offset == 0 && size == bio->bi_iter.bi_size) - return; - - clear_bit(BIO_SEG_VALID, &bio->bi_flags); - - bio_advance(bio, offset << 9); - - bio->bi_iter.bi_size = size; -} -EXPORT_SYMBOL_GPL(bio_trim); - -/* - * create memory pools for biovec's in a bio_set. - * use the global biovec slabs created for general use. - */ -mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) -{ - struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - - return mempool_create_slab_pool(pool_entries, bp->slab); -} - -void bioset_free(struct bio_set *bs) -{ - if (bs->rescue_workqueue) - destroy_workqueue(bs->rescue_workqueue); - - if (bs->bio_pool) - mempool_destroy(bs->bio_pool); - - if (bs->bvec_pool) - mempool_destroy(bs->bvec_pool); - - bioset_integrity_free(bs); - bio_put_slab(bs); - - kfree(bs); -} -EXPORT_SYMBOL(bioset_free); - -/** - * bioset_create - Create a bio_set - * @pool_size: Number of bio and bio_vecs to cache in the mempool - * @front_pad: Number of bytes to allocate in front of the returned bio - * - * Description: - * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller - * to ask for a number of bytes to be allocated in front of the bio. - * Front pad allocation is useful for embedding the bio inside - * another structure, to avoid allocating extra data to go with the bio. - * Note that the bio must be embedded at the END of that structure always, - * or things will break badly. - */ -struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) -{ - unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); - struct bio_set *bs; - - bs = kzalloc(sizeof(*bs), GFP_KERNEL); - if (!bs) - return NULL; - - bs->front_pad = front_pad; - - spin_lock_init(&bs->rescue_lock); - bio_list_init(&bs->rescue_list); - INIT_WORK(&bs->rescue_work, bio_alloc_rescue); - - bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); - if (!bs->bio_slab) { - kfree(bs); - return NULL; - } - - bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab); - if (!bs->bio_pool) - goto bad; - - bs->bvec_pool = biovec_create_pool(bs, pool_size); - if (!bs->bvec_pool) - goto bad; - - bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); - if (!bs->rescue_workqueue) - goto bad; - - return bs; -bad: - bioset_free(bs); - return NULL; -} -EXPORT_SYMBOL(bioset_create); - -#ifdef CONFIG_BLK_CGROUP -/** - * bio_associate_current - associate a bio with %current - * @bio: target bio - * - * Associate @bio with %current if it hasn't been associated yet. Block - * layer will treat @bio as if it were issued by %current no matter which - * task actually issues it. - * - * This function takes an extra reference of @task's io_context and blkcg - * which will be put when @bio is released. The caller must own @bio, - * ensure %current->io_context exists, and is responsible for synchronizing - * calls to this function. - */ -int bio_associate_current(struct bio *bio) -{ - struct io_context *ioc; - struct cgroup_subsys_state *css; - - if (bio->bi_ioc) - return -EBUSY; - - ioc = current->io_context; - if (!ioc) - return -ENOENT; - - /* acquire active ref on @ioc and associate */ - get_io_context_active(ioc); - bio->bi_ioc = ioc; - - /* associate blkcg if exists */ - rcu_read_lock(); - css = task_css(current, blkio_cgrp_id); - if (css && css_tryget(css)) - bio->bi_css = css; - rcu_read_unlock(); - - return 0; -} - -/** - * bio_disassociate_task - undo bio_associate_current() - * @bio: target bio - */ -void bio_disassociate_task(struct bio *bio) -{ - if (bio->bi_ioc) { - put_io_context(bio->bi_ioc); - bio->bi_ioc = NULL; - } - if (bio->bi_css) { - css_put(bio->bi_css); - bio->bi_css = NULL; - } -} - -#endif /* CONFIG_BLK_CGROUP */ - -static void __init biovec_init_slabs(void) -{ - int i; - - for (i = 0; i < BIOVEC_NR_POOLS; i++) { - int size; - struct biovec_slab *bvs = bvec_slabs + i; - - if (bvs->nr_vecs <= BIO_INLINE_VECS) { - bvs->slab = NULL; - continue; - } - - size = bvs->nr_vecs * sizeof(struct bio_vec); - bvs->slab = kmem_cache_create(bvs->name, size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - } -} - -static int __init init_bio(void) -{ - bio_slab_max = 2; - bio_slab_nr = 0; - bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL); - if (!bio_slabs) - panic("bio: can't allocate bios\n"); - - bio_integrity_init(); - biovec_init_slabs(); - - fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); - if (!fs_bio_set) - panic("bio: can't allocate bios\n"); - - if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) - panic("bio: can't create integrity pool\n"); - - return 0; -} -subsys_initcall(init_bio); diff --git a/fs/block_dev.c b/fs/block_dev.c index 552a8d13bc32..83fba15cc394 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -363,6 +363,69 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) } EXPORT_SYMBOL(blkdev_fsync); +/** + * bdev_read_page() - Start reading a page from a block device + * @bdev: The device to read the page from + * @sector: The offset on the device to read the page to (need not be aligned) + * @page: The page to read + * + * On entry, the page should be locked. It will be unlocked when the page + * has been read. If the block driver implements rw_page synchronously, + * that will be true on exit from this function, but it need not be. + * + * Errors returned by this function are usually "soft", eg out of memory, or + * queue full; callers should try a different route to read this page rather + * than propagate an error back up the stack. + * + * Return: negative errno if an error occurs, 0 if submission was successful. + */ +int bdev_read_page(struct block_device *bdev, sector_t sector, + struct page *page) +{ + const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page) + return -EOPNOTSUPP; + return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); +} +EXPORT_SYMBOL_GPL(bdev_read_page); + +/** + * bdev_write_page() - Start writing a page to a block device + * @bdev: The device to write the page to + * @sector: The offset on the device to write the page to (need not be aligned) + * @page: The page to write + * @wbc: The writeback_control for the write + * + * On entry, the page should be locked and not currently under writeback. + * On exit, if the write started successfully, the page will be unlocked and + * under writeback. If the write failed already (eg the driver failed to + * queue the page to the device), the page will still be locked. If the + * caller is a ->writepage implementation, it will need to unlock the page. + * + * Errors returned by this function are usually "soft", eg out of memory, or + * queue full; callers should try a different route to write this page rather + * than propagate an error back up the stack. + * + * Return: negative errno if an error occurs, 0 if submission was successful. + */ +int bdev_write_page(struct block_device *bdev, sector_t sector, + struct page *page, struct writeback_control *wbc) +{ + int result; + int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; + const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page) + return -EOPNOTSUPP; + set_page_writeback(page); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); + if (result) + end_page_writeback(page); + else + unlock_page(page); + return result; +} +EXPORT_SYMBOL_GPL(bdev_write_page); + /* * pseudo-fs */ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index c9a24444ec9a..2256e9cceec5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -279,7 +279,7 @@ static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3955e475ceec..4cd0ac983f91 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3458,7 +3458,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, static void end_extent_buffer_writeback(struct extent_buffer *eb) { clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); } @@ -4510,7 +4510,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) spin_unlock(&eb->refs_lock); } -static void mark_extent_buffer_accessed(struct extent_buffer *eb) +static void mark_extent_buffer_accessed(struct extent_buffer *eb, + struct page *accessed) { unsigned long num_pages, i; @@ -4519,7 +4520,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); - mark_page_accessed(p); + if (p != accessed) + mark_page_accessed(p); } } @@ -4533,7 +4535,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + mark_extent_buffer_accessed(eb, NULL); return eb; } rcu_read_unlock(); @@ -4581,7 +4583,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, spin_unlock(&mapping->private_lock); unlock_page(p); page_cache_release(p); - mark_extent_buffer_accessed(exists); + mark_extent_buffer_accessed(exists, p); goto free_eb; } @@ -4596,7 +4598,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, attach_extent_buffer_page(eb, p); spin_unlock(&mapping->private_lock); WARN_ON(PageDirty(p)); - mark_page_accessed(p); eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ae6af072b635..74272a3f9d9b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -470,11 +470,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) for (i = 0; i < num_pages; i++) { /* page checked is some magic around finding pages that * have been modified without going through btrfs_set_page_dirty - * clear it here + * clear it here. There should be no need to mark the pages + * accessed as prepare_pages should have marked them accessed + * in prepare_pages via find_or_create_page() */ ClearPageChecked(pages[i]); unlock_page(pages[i]); - mark_page_accessed(pages[i]); page_cache_release(pages[i]); } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f805bc944fa..5a3b8371772e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7126,7 +7126,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) * before atomic variable goto zero, we must make sure * dip->errors is perceived to be set. */ - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); } /* if there are more bios still pending for this dio, just exit */ @@ -7306,7 +7306,7 @@ out_err: * before atomic variable goto zero, we must * make sure dip->errors is perceived to be set. */ - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); if (atomic_dec_and_test(&dip->pending_bios)) bio_io_error(dip->orig_bio); @@ -7449,7 +7449,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, return 0; atomic_inc(&inode->i_dio_count); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); /* * The generic stuff only does filemap_write_and_wait_range, which diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2ad7de94efef..3f52bb7a58d2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -642,7 +642,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, return -EINVAL; atomic_inc(&root->will_be_snapshoted); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); btrfs_wait_nocow_write(root); ret = btrfs_start_delalloc_inodes(root, 0); @@ -3120,6 +3120,8 @@ process_slot: } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; + u64 aligned_end = 0; + if (off > key.offset) { skip = off - key.offset; new_key.offset += skip; @@ -3136,9 +3138,11 @@ process_slot: size -= skip + trim; datal -= skip + trim; + aligned_end = ALIGN(new_key.offset + datal, + root->sectorsize); ret = btrfs_drop_extents(trans, root, inode, new_key.offset, - new_key.offset + datal, + aligned_end, 1); if (ret) { if (ret != -EOPNOTSUPP) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index eb6537a08c1b..484aacac2c89 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -360,10 +360,13 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) /* * First time the inline_buf does not suffice */ - if (p->buf == p->inline_buf) + if (p->buf == p->inline_buf) { tmp_buf = kmalloc(len, GFP_NOFS); - else + if (tmp_buf) + memcpy(tmp_buf, p->buf, old_buf_len); + } else { tmp_buf = krealloc(p->buf, len, GFP_NOFS); + } if (!tmp_buf) return -ENOMEM; p->buf = tmp_buf; @@ -1668,7 +1671,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino, goto out; } - if (key.type == BTRFS_INODE_REF_KEY) { + if (found_key.type == BTRFS_INODE_REF_KEY) { struct btrfs_inode_ref *iref; iref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); diff --git a/fs/buffer.c b/fs/buffer.c index 9ddb9fc7d923..eba6e4f621ce 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -77,7 +77,7 @@ EXPORT_SYMBOL(__lock_buffer); void unlock_buffer(struct buffer_head *bh) { clear_bit_unlock(BH_Lock, &bh->b_state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&bh->b_state, BH_Lock); } EXPORT_SYMBOL(unlock_buffer); @@ -227,7 +227,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) int all_mapped = 1; index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits); - page = find_get_page(bd_mapping, index); + page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); if (!page) goto out; @@ -1366,12 +1366,13 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { + /* __find_get_block_slow will mark the page accessed */ bh = __find_get_block_slow(bdev, block); if (bh) bh_lru_install(bh); - } - if (bh) + } else touch_buffer(bh); + return bh; } EXPORT_SYMBOL(__find_get_block); @@ -1483,16 +1484,27 @@ EXPORT_SYMBOL(set_bh_page); /* * Called when truncating a buffer on a page completely. */ + +/* Bits that are cleared during an invalidate */ +#define BUFFER_FLAGS_DISCARD \ + (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \ + 1 << BH_Delay | 1 << BH_Unwritten) + static void discard_buffer(struct buffer_head * bh) { + unsigned long b_state, b_state_old; + lock_buffer(bh); clear_buffer_dirty(bh); bh->b_bdev = NULL; - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - clear_buffer_delay(bh); - clear_buffer_unwritten(bh); + b_state = bh->b_state; + for (;;) { + b_state_old = cmpxchg(&bh->b_state, b_state, + (b_state & ~BUFFER_FLAGS_DISCARD)); + if (b_state_old == b_state) + break; + b_state = b_state_old; + } unlock_buffer(bh); } @@ -2879,10 +2891,9 @@ EXPORT_SYMBOL(block_truncate_page); /* * The generic ->writepage function for buffer-backed address_spaces - * this form passes in the end_io handler used to finish the IO. */ -int block_write_full_page_endio(struct page *page, get_block_t *get_block, - struct writeback_control *wbc, bh_end_io_t *handler) +int block_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); @@ -2892,7 +2903,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, /* Is the page fully inside i_size? */ if (page->index < end_index) return __block_write_full_page(inode, page, get_block, wbc, - handler); + end_buffer_async_write); /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_CACHE_SIZE-1); @@ -2915,18 +2926,8 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, * writes to that region are not written out to the file." */ zero_user_segment(page, offset, PAGE_CACHE_SIZE); - return __block_write_full_page(inode, page, get_block, wbc, handler); -} -EXPORT_SYMBOL(block_write_full_page_endio); - -/* - * The generic ->writepage function for buffer-backed address_spaces - */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) -{ - return block_write_full_page_endio(page, get_block, wbc, - end_buffer_async_write); + return __block_write_full_page(inode, page, get_block, wbc, + end_buffer_async_write); } EXPORT_SYMBOL(block_write_full_page); diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 5b99bafc31d1..d749731dc0ee 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -50,18 +50,18 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) cache->brun_percent < 100); if (*args) { - kerror("'bind' command doesn't take an argument"); + pr_err("'bind' command doesn't take an argument"); return -EINVAL; } if (!cache->rootdirname) { - kerror("No cache directory specified"); + pr_err("No cache directory specified"); return -EINVAL; } /* don't permit already bound caches to be re-bound */ if (test_bit(CACHEFILES_READY, &cache->flags)) { - kerror("Cache already bound"); + pr_err("Cache already bound"); return -EBUSY; } @@ -228,9 +228,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) set_bit(CACHEFILES_READY, &cache->flags); dput(root); - printk(KERN_INFO "CacheFiles:" - " File cache on %s registered\n", - cache->cache.identifier); + pr_info("File cache on %s registered\n", cache->cache.identifier); /* check how much space the cache has */ cachefiles_has_space(cache, 0, 0); @@ -250,7 +248,7 @@ error_open_root: kmem_cache_free(cachefiles_object_jar, fsdef); error_root_object: cachefiles_end_secure(cache, saved_cred); - kerror("Failed to register: %d", ret); + pr_err("Failed to register: %d", ret); return ret; } @@ -262,9 +260,8 @@ void cachefiles_daemon_unbind(struct cachefiles_cache *cache) _enter(""); if (test_bit(CACHEFILES_READY, &cache->flags)) { - printk(KERN_INFO "CacheFiles:" - " File cache on %s unregistering\n", - cache->cache.identifier); + pr_info("File cache on %s unregistering\n", + cache->cache.identifier); fscache_withdraw_cache(&cache->cache); } diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 0a1467b15516..b078d3081d6c 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -315,8 +315,7 @@ static unsigned int cachefiles_daemon_poll(struct file *file, static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, char *args) { - kerror("Free space limits must be in range" - " 0%%<=stop<cull<run<100%%"); + pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%"); return -EINVAL; } @@ -476,12 +475,12 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) _enter(",%s", args); if (!*args) { - kerror("Empty directory specified"); + pr_err("Empty directory specified"); return -EINVAL; } if (cache->rootdirname) { - kerror("Second cache directory specified"); + pr_err("Second cache directory specified"); return -EEXIST; } @@ -504,12 +503,12 @@ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) _enter(",%s", args); if (!*args) { - kerror("Empty security context specified"); + pr_err("Empty security context specified"); return -EINVAL; } if (cache->secctx) { - kerror("Second security context specified"); + pr_err("Second security context specified"); return -EINVAL; } @@ -532,7 +531,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) _enter(",%s", args); if (!*args) { - kerror("Empty tag specified"); + pr_err("Empty tag specified"); return -EINVAL; } @@ -563,12 +562,12 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) goto inval; if (!test_bit(CACHEFILES_READY, &cache->flags)) { - kerror("cull applied to unready cache"); + pr_err("cull applied to unready cache"); return -EIO; } if (test_bit(CACHEFILES_DEAD, &cache->flags)) { - kerror("cull applied to dead cache"); + pr_err("cull applied to dead cache"); return -EIO; } @@ -588,11 +587,11 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) notdir: path_put(&path); - kerror("cull command requires dirfd to be a directory"); + pr_err("cull command requires dirfd to be a directory"); return -ENOTDIR; inval: - kerror("cull command requires dirfd and filename"); + pr_err("cull command requires dirfd and filename"); return -EINVAL; } @@ -615,7 +614,7 @@ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) return 0; inval: - kerror("debug command requires mask"); + pr_err("debug command requires mask"); return -EINVAL; } @@ -635,12 +634,12 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) goto inval; if (!test_bit(CACHEFILES_READY, &cache->flags)) { - kerror("inuse applied to unready cache"); + pr_err("inuse applied to unready cache"); return -EIO; } if (test_bit(CACHEFILES_DEAD, &cache->flags)) { - kerror("inuse applied to dead cache"); + pr_err("inuse applied to dead cache"); return -EIO; } @@ -660,11 +659,11 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) notdir: path_put(&path); - kerror("inuse command requires dirfd to be a directory"); + pr_err("inuse command requires dirfd to be a directory"); return -ENOTDIR; inval: - kerror("inuse command requires dirfd and filename"); + pr_err("inuse command requires dirfd and filename"); return -EINVAL; } diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 57e17fe6121a..584743d456c3 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -146,8 +146,7 @@ static int cachefiles_lookup_object(struct fscache_object *_object) if (ret < 0 && ret != -ETIMEDOUT) { if (ret != -ENOBUFS) - printk(KERN_WARNING - "CacheFiles: Lookup failed error %d\n", ret); + pr_warn("Lookup failed error %d\n", ret); fscache_object_lookup_error(&object->fscache); } diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 5349473df1b1..3d50998abf57 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -9,6 +9,13 @@ * 2 of the Licence, or (at your option) any later version. */ +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "CacheFiles: " fmt + + #include <linux/fscache-cache.h> #include <linux/timer.h> #include <linux/wait.h> @@ -245,11 +252,10 @@ extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, /* * error handling */ -#define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__) #define cachefiles_io_error(___cache, FMT, ...) \ do { \ - kerror("I/O Error: " FMT, ##__VA_ARGS__); \ + pr_err("I/O Error: " FMT, ##__VA_ARGS__); \ fscache_io_error(&(___cache)->cache); \ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ } while (0) @@ -310,8 +316,8 @@ do { \ #define ASSERT(X) \ do { \ if (unlikely(!(X))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) @@ -319,9 +325,9 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ if (unlikely(!((X) OP (Y)))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ - printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ (unsigned long)(X), (unsigned long)(Y)); \ BUG(); \ } \ @@ -330,8 +336,8 @@ do { \ #define ASSERTIF(C, X) \ do { \ if (unlikely((C) && !(X))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) @@ -339,9 +345,9 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ if (unlikely((C) && !((X) OP (Y)))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ - printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ (unsigned long)(X), (unsigned long)(Y)); \ BUG(); \ } \ diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index 4bfa8cf43bf5..180edfb45f66 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -68,8 +68,7 @@ static int __init cachefiles_init(void) SLAB_HWCACHE_ALIGN, cachefiles_object_init_once); if (!cachefiles_object_jar) { - printk(KERN_NOTICE - "CacheFiles: Failed to allocate an object jar\n"); + pr_notice("Failed to allocate an object jar\n"); goto error_object_jar; } @@ -77,7 +76,7 @@ static int __init cachefiles_init(void) if (ret < 0) goto error_proc; - printk(KERN_INFO "CacheFiles: Loaded\n"); + pr_info("Loaded\n"); return 0; error_proc: @@ -85,7 +84,7 @@ error_proc: error_object_jar: misc_deregister(&cachefiles_dev); error_dev: - kerror("failed to register: %d", ret); + pr_err("failed to register: %d", ret); return ret; } @@ -96,7 +95,7 @@ fs_initcall(cachefiles_init); */ static void __exit cachefiles_exit(void) { - printk(KERN_INFO "CacheFiles: Unloading\n"); + pr_info("Unloading\n"); cachefiles_proc_cleanup(); kmem_cache_destroy(cachefiles_object_jar); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index c0a681705104..5bf2b41e66d3 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -35,22 +35,21 @@ void __cachefiles_printk_object(struct cachefiles_object *object, struct fscache_cookie *cookie; unsigned keylen, loop; - printk(KERN_ERR "%sobject: OBJ%x\n", - prefix, object->fscache.debug_id); - printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", + pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id); + pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", prefix, object->fscache.state->name, object->fscache.flags, work_busy(&object->fscache.work), object->fscache.events, object->fscache.event_mask); - printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", + pr_err("%sops=%u inp=%u exc=%u\n", prefix, object->fscache.n_ops, object->fscache.n_in_progress, object->fscache.n_exclusive); - printk(KERN_ERR "%sparent=%p\n", + pr_err("%sparent=%p\n", prefix, object->fscache.parent); spin_lock(&object->fscache.lock); cookie = object->fscache.cookie; if (cookie) { - printk(KERN_ERR "%scookie=%p [pr=%p nd=%p fl=%lx]\n", + pr_err("%scookie=%p [pr=%p nd=%p fl=%lx]\n", prefix, object->fscache.cookie, object->fscache.cookie->parent, @@ -62,16 +61,16 @@ void __cachefiles_printk_object(struct cachefiles_object *object, else keylen = 0; } else { - printk(KERN_ERR "%scookie=NULL\n", prefix); + pr_err("%scookie=NULL\n", prefix); keylen = 0; } spin_unlock(&object->fscache.lock); if (keylen) { - printk(KERN_ERR "%skey=[%u] '", prefix, keylen); + pr_err("%skey=[%u] '", prefix, keylen); for (loop = 0; loop < keylen; loop++) - printk("%02x", keybuf[loop]); - printk("'\n"); + pr_cont("%02x", keybuf[loop]); + pr_cont("'\n"); } } @@ -131,13 +130,11 @@ found_dentry: dentry); if (fscache_object_is_live(&object->fscache)) { - printk(KERN_ERR "\n"); - printk(KERN_ERR "CacheFiles: Error:" - " Can't preemptively bury live object\n"); + pr_err("\n"); + pr_err("Error: Can't preemptively bury live object\n"); cachefiles_printk_object(object, NULL); } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { - printk(KERN_ERR "CacheFiles: Error:" - " Object already preemptively buried\n"); + pr_err("Error: Object already preemptively buried\n"); } write_unlock(&cache->active_lock); @@ -160,7 +157,7 @@ try_again: write_lock(&cache->active_lock); if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { - printk(KERN_ERR "CacheFiles: Error: Object already active\n"); + pr_err("Error: Object already active\n"); cachefiles_printk_object(object, NULL); BUG(); } @@ -193,9 +190,8 @@ try_again: * need to wait for it to be destroyed */ wait_for_old_object: if (fscache_object_is_live(&object->fscache)) { - printk(KERN_ERR "\n"); - printk(KERN_ERR "CacheFiles: Error:" - " Unexpected object collision\n"); + pr_err("\n"); + pr_err("Error: Unexpected object collision\n"); cachefiles_printk_object(object, xobject); BUG(); } @@ -241,9 +237,8 @@ wait_for_old_object: } if (timeout <= 0) { - printk(KERN_ERR "\n"); - printk(KERN_ERR "CacheFiles: Error: Overlong" - " wait for old active object to go away\n"); + pr_err("\n"); + pr_err("Error: Overlong wait for old active object to go away\n"); cachefiles_printk_object(object, xobject); goto requeue; } @@ -548,7 +543,7 @@ lookup_again: next, next->d_inode, next->d_inode->i_ino); } else if (!S_ISDIR(next->d_inode->i_mode)) { - kerror("inode %lu is not a directory", + pr_err("inode %lu is not a directory", next->d_inode->i_ino); ret = -ENOBUFS; goto error; @@ -579,7 +574,7 @@ lookup_again: } else if (!S_ISDIR(next->d_inode->i_mode) && !S_ISREG(next->d_inode->i_mode) ) { - kerror("inode %lu is not a file or directory", + pr_err("inode %lu is not a file or directory", next->d_inode->i_ino); ret = -ENOBUFS; goto error; @@ -773,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, ASSERT(subdir->d_inode); if (!S_ISDIR(subdir->d_inode->i_mode)) { - kerror("%s is not a directory", dirname); + pr_err("%s is not a directory", dirname); ret = -EIO; goto check_error; } @@ -800,13 +795,13 @@ check_error: mkdir_error: mutex_unlock(&dir->d_inode->i_mutex); dput(subdir); - kerror("mkdir %s failed with error %d", dirname, ret); + pr_err("mkdir %s failed with error %d", dirname, ret); return ERR_PTR(ret); lookup_error: mutex_unlock(&dir->d_inode->i_mutex); ret = PTR_ERR(subdir); - kerror("Lookup %s failed with error %d", dirname, ret); + pr_err("Lookup %s failed with error %d", dirname, ret); return ERR_PTR(ret); nomem_d_alloc: @@ -896,7 +891,7 @@ lookup_error: if (ret == -EIO) { cachefiles_io_error(cache, "Lookup failed"); } else if (ret != -ENOMEM) { - kerror("Internal error: %d", ret); + pr_err("Internal error: %d", ret); ret = -EIO; } @@ -955,7 +950,7 @@ error: } if (ret != -ENOMEM) { - kerror("Internal error: %d", ret); + pr_err("Internal error: %d", ret); ret = -EIO; } diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index 039b5011d83b..396c18ea2764 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -34,9 +34,7 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache) ret = set_security_override_from_ctx(new, cache->secctx); if (ret < 0) { put_cred(new); - printk(KERN_ERR "CacheFiles:" - " Security denies permission to nominate" - " security context: error %d\n", + pr_err("Security denies permission to nominate security context: error %d\n", ret); goto error; } @@ -59,16 +57,14 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, ret = security_inode_mkdir(root->d_inode, root, 0); if (ret < 0) { - printk(KERN_ERR "CacheFiles:" - " Security denies permission to make dirs: error %d", + pr_err("Security denies permission to make dirs: error %d", ret); return ret; } ret = security_inode_create(root->d_inode, root, 0); if (ret < 0) - printk(KERN_ERR "CacheFiles:" - " Security denies permission to create files: error %d", + pr_err("Security denies permission to create files: error %d", ret); return ret; diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 12b0eef84183..1ad51ffbb275 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -51,7 +51,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object) } if (ret != -EEXIST) { - kerror("Can't set xattr on %*.*s [%lu] (err %d)", + pr_err("Can't set xattr on %*.*s [%lu] (err %d)", dentry->d_name.len, dentry->d_name.len, dentry->d_name.name, dentry->d_inode->i_ino, -ret); @@ -64,7 +64,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object) if (ret == -ERANGE) goto bad_type_length; - kerror("Can't read xattr on %*.*s [%lu] (err %d)", + pr_err("Can't read xattr on %*.*s [%lu] (err %d)", dentry->d_name.len, dentry->d_name.len, dentry->d_name.name, dentry->d_inode->i_ino, -ret); @@ -85,14 +85,14 @@ error: return ret; bad_type_length: - kerror("Cache object %lu type xattr length incorrect", + pr_err("Cache object %lu type xattr length incorrect", dentry->d_inode->i_ino); ret = -EIO; goto error; bad_type: xtype[2] = 0; - kerror("Cache object %*.*s [%lu] type %s not %s", + pr_err("Cache object %*.*s [%lu] type %s not %s", dentry->d_name.len, dentry->d_name.len, dentry->d_name.name, dentry->d_inode->i_ino, xtype, type); @@ -293,7 +293,7 @@ error: return ret; bad_type_length: - kerror("Cache object %lu xattr length incorrect", + pr_err("Cache object %lu xattr length incorrect", dentry->d_inode->i_ino); ret = -EIO; goto error; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b53278c9fd97..65a30e817dd8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -694,7 +694,7 @@ static int ceph_writepages_start(struct address_space *mapping, (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { - pr_warning("writepage_start %p on forced umount\n", inode); + pr_warn("writepage_start %p on forced umount\n", inode); return -EIO; /* we're in a forced umount, don't write! */ } if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 16b54aa31f08..5a743ac141ab 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -71,9 +71,9 @@ static int mdsc_show(struct seq_file *s, void *p) seq_printf(s, "%s", ceph_mds_op_name(req->r_op)); if (req->r_got_unsafe) - seq_printf(s, "\t(unsafe)"); + seq_puts(s, "\t(unsafe)"); else - seq_printf(s, "\t"); + seq_puts(s, "\t"); if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); @@ -119,7 +119,7 @@ static int mdsc_show(struct seq_file *s, void *p) seq_printf(s, " %s", req->r_path2); } - seq_printf(s, "\n"); + seq_puts(s, "\n"); } mutex_unlock(&mdsc->mutex); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 233c6f96910a..e4fff9ff1c27 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -821,7 +821,7 @@ no_change: spin_unlock(&ci->i_ceph_lock); } } else if (cap_fmode >= 0) { - pr_warning("mds issued no caps on %llx.%llx\n", + pr_warn("mds issued no caps on %llx.%llx\n", ceph_vinop(inode)); __ceph_get_fmode(ci, cap_fmode); } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 191398852a2e..fbc39c47bacd 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -53,10 +53,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, else length = fl->fl_end - fl->fl_start + 1; - if (lock_type == CEPH_LOCK_FCNTL) - owner = secure_addr(fl->fl_owner); - else - owner = secure_addr(fl->fl_file); + owner = secure_addr(fl->fl_owner); dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, @@ -314,10 +311,7 @@ int lock_to_ceph_filelock(struct file_lock *lock, cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); cephlock->client = cpu_to_le64(0); cephlock->pid = cpu_to_le64((u64)lock->fl_pid); - if (lock->fl_flags & FL_POSIX) - cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); - else - cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file)); + cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); switch (lock->fl_type) { case F_RDLCK: diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2b4d093d0563..9a33b98cb000 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2218,13 +2218,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* dup? */ if ((req->r_got_unsafe && !head->safe) || (req->r_got_safe && head->safe)) { - pr_warning("got a dup %s reply on %llu from mds%d\n", + pr_warn("got a dup %s reply on %llu from mds%d\n", head->safe ? "safe" : "unsafe", tid, mds); mutex_unlock(&mdsc->mutex); goto out; } if (req->r_got_safe && !head->safe) { - pr_warning("got unsafe after safe on %llu from mds%d\n", + pr_warn("got unsafe after safe on %llu from mds%d\n", tid, mds); mutex_unlock(&mdsc->mutex); goto out; @@ -3525,7 +3525,7 @@ static void peer_reset(struct ceph_connection *con) struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - pr_warning("mds%d closed our session\n", s->s_mds); + pr_warn("mds%d closed our session\n", s->s_mds); send_mds_reconnect(mdsc, s); } diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 132b64eeecd4..261531e55e9d 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -62,7 +62,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_decode_16_safe(p, end, version, bad); if (version > 3) { - pr_warning("got mdsmap version %d > 3, failing", version); + pr_warn("got mdsmap version %d > 3, failing", version); goto bad; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5be1f997ecde..6aaa8112c538 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -87,10 +87,6 @@ extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; -#ifdef CONFIG_CIFS_SMB2 -__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; -#endif - /* * Bumps refcount for cifs super block. * Note that it should be only called if a referece to VFS super block is @@ -251,11 +247,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); - cifs_inode->delete_pending = false; - cifs_inode->invalid_mapping = false; - clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags); - clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags); - clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags); + cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ @@ -302,7 +294,7 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr; - seq_printf(s, ",addr="); + seq_puts(s, ",addr="); switch (server->dstaddr.ss_family) { case AF_INET: @@ -314,7 +306,7 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) seq_printf(s, "%%%u", sa6->sin6_scope_id); break; default: - seq_printf(s, "(unknown)"); + seq_puts(s, "(unknown)"); } } @@ -324,45 +316,45 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses) if (ses->sectype == Unspecified) return; - seq_printf(s, ",sec="); + seq_puts(s, ",sec="); switch (ses->sectype) { case LANMAN: - seq_printf(s, "lanman"); + seq_puts(s, "lanman"); break; case NTLMv2: - seq_printf(s, "ntlmv2"); + seq_puts(s, "ntlmv2"); break; case NTLM: - seq_printf(s, "ntlm"); + seq_puts(s, "ntlm"); break; case Kerberos: - seq_printf(s, "krb5"); + seq_puts(s, "krb5"); break; case RawNTLMSSP: - seq_printf(s, "ntlmssp"); + seq_puts(s, "ntlmssp"); break; default: /* shouldn't ever happen */ - seq_printf(s, "unknown"); + seq_puts(s, "unknown"); break; } if (ses->sign) - seq_printf(s, "i"); + seq_puts(s, "i"); } static void cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) { - seq_printf(s, ",cache="); + seq_puts(s, ",cache="); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) - seq_printf(s, "strict"); + seq_puts(s, "strict"); else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) - seq_printf(s, "none"); + seq_puts(s, "none"); else - seq_printf(s, "loose"); + seq_puts(s, "loose"); } static void @@ -395,7 +387,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) cifs_show_cache_flavor(s, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) - seq_printf(s, ",multiuser"); + seq_puts(s, ",multiuser"); else if (tcon->ses->user_name) seq_printf(s, ",username=%s", tcon->ses->user_name); @@ -421,16 +413,16 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid)); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) - seq_printf(s, ",forceuid"); + seq_puts(s, ",forceuid"); else - seq_printf(s, ",noforceuid"); + seq_puts(s, ",noforceuid"); seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid)); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) - seq_printf(s, ",forcegid"); + seq_puts(s, ",forcegid"); else - seq_printf(s, ",noforcegid"); + seq_puts(s, ",noforcegid"); cifs_show_address(s, tcon->ses->server); @@ -442,47 +434,47 @@ cifs_show_options(struct seq_file *s, struct dentry *root) cifs_show_nls(s, cifs_sb->local_nls); if (tcon->seal) - seq_printf(s, ",seal"); + seq_puts(s, ",seal"); if (tcon->nocase) - seq_printf(s, ",nocase"); + seq_puts(s, ",nocase"); if (tcon->retry) - seq_printf(s, ",hard"); + seq_puts(s, ",hard"); if (tcon->unix_ext) - seq_printf(s, ",unix"); + seq_puts(s, ",unix"); else - seq_printf(s, ",nounix"); + seq_puts(s, ",nounix"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - seq_printf(s, ",posixpaths"); + seq_puts(s, ",posixpaths"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) - seq_printf(s, ",setuids"); + seq_puts(s, ",setuids"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - seq_printf(s, ",serverino"); + seq_puts(s, ",serverino"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) - seq_printf(s, ",rwpidforward"); + seq_puts(s, ",rwpidforward"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) - seq_printf(s, ",forcemand"); + seq_puts(s, ",forcemand"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) - seq_printf(s, ",nouser_xattr"); + seq_puts(s, ",nouser_xattr"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - seq_printf(s, ",mapchars"); + seq_puts(s, ",mapchars"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) - seq_printf(s, ",sfu"); + seq_puts(s, ",sfu"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - seq_printf(s, ",nobrl"); + seq_puts(s, ",nobrl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - seq_printf(s, ",cifsacl"); + seq_puts(s, ",cifsacl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) - seq_printf(s, ",dynperm"); + seq_puts(s, ",dynperm"); if (root->d_sb->s_flags & MS_POSIXACL) - seq_printf(s, ",acl"); + seq_puts(s, ",acl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) - seq_printf(s, ",mfsymlinks"); + seq_puts(s, ",mfsymlinks"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) - seq_printf(s, ",fsc"); + seq_puts(s, ",fsc"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) - seq_printf(s, ",nostrictsync"); + seq_puts(s, ",nostrictsync"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) - seq_printf(s, ",noperm"); + seq_puts(s, ",noperm"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) seq_printf(s, ",backupuid=%u", from_kuid_munged(&init_user_ns, @@ -1192,10 +1184,6 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); -#ifdef CONFIG_CIFS_SMB2 - get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE); -#endif - if (cifs_max_pending < 2) { cifs_max_pending = 2; cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 26a754f49ba1..8fe51166d6e3 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -22,20 +22,28 @@ #ifndef _CIFSFS_H #define _CIFSFS_H +#include <linux/hash.h> + #define ROOT_I 2 /* * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down - * so that it will fit. + * so that it will fit. We use hash_64 to convert the value to 31 bits, and + * then add 1, to ensure that we don't end up with a 0 as the value. */ +#if BITS_PER_LONG == 64 static inline ino_t cifs_uniqueid_to_ino_t(u64 fileid) { - ino_t ino = (ino_t) fileid; - if (sizeof(ino_t) < sizeof(u64)) - ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8; - return ino; + return (ino_t)fileid; } +#else +static inline ino_t +cifs_uniqueid_to_ino_t(u64 fileid) +{ + return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; +} +#endif extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; @@ -67,6 +75,8 @@ extern int cifs_revalidate_dentry_attr(struct dentry *); extern int cifs_revalidate_file(struct file *filp); extern int cifs_revalidate_dentry(struct dentry *); extern int cifs_invalidate_mapping(struct inode *inode); +extern int cifs_revalidate_mapping(struct inode *inode); +extern int cifs_zap_mapping(struct inode *inode); extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int cifs_setattr(struct dentry *, struct iattr *); @@ -130,5 +140,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.02" +#define CIFS_VERSION "2.03" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 30f6e9251a4a..de6aed8c78e5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -559,6 +559,7 @@ struct TCP_Server_Info { int echo_credits; /* echo reserved slots */ int oplock_credits; /* oplock break reserved slots */ bool echoes:1; /* enable echoes */ + __u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */ #endif u16 dialect; /* dialect index that server chose */ bool oplocks:1; /* enable oplocks */ @@ -1113,12 +1114,13 @@ struct cifsInodeInfo { __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ unsigned int epoch; /* used to track lease state changes */ - bool delete_pending; /* DELETE_ON_CLOSE is set */ - bool invalid_mapping; /* pagecache is invalid */ - unsigned long flags; #define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ #define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ #define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */ +#define CIFS_INO_DELETE_PENDING (3) /* delete pending on server */ +#define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */ +#define CIFS_INO_LOCK (5) /* lock bit for synchronization */ + unsigned long flags; spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ unsigned long time; /* jiffies of last update of inode */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8813ff776ba3..20d75b8ddb26 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2144,6 +2144,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info) sizeof(tcp_ses->srcaddr)); memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, sizeof(tcp_ses->dstaddr)); +#ifdef CONFIG_CIFS_SMB2 + get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE); +#endif /* * at this point we are the only ones with the pointer * to the struct since the kernel thread not created yet @@ -2225,7 +2228,7 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) vol->username ? vol->username : "", CIFS_MAX_USERNAME_LEN)) return 0; - if (strlen(vol->username) != 0 && + if ((vol->username && strlen(vol->username) != 0) && ses->password != NULL && strncmp(ses->password, vol->password ? vol->password : "", diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5ed03e0b8b40..208f56eca4bf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -335,7 +335,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, spin_unlock(&cifs_file_list_lock); if (fid->purge_cache) - cifs_invalidate_mapping(inode); + cifs_zap_mapping(inode); file->private_data = cfile; return cfile; @@ -392,7 +392,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) * again and get at least level II oplock. */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) - CIFS_I(inode)->invalid_mapping = true; + set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); cifs_set_oplock_level(cifsi, 0); } spin_unlock(&cifs_file_list_lock); @@ -1529,7 +1529,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, */ if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && CIFS_CACHE_READ(CIFS_I(inode))) { - cifs_invalidate_mapping(inode); + cifs_zap_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", inode); CIFS_I(inode)->oplock = 0; @@ -2218,7 +2218,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, file->f_path.dentry->d_name.name, datasync); if (!CIFS_CACHE_READ(CIFS_I(inode))) { - rc = cifs_invalidate_mapping(inode); + rc = cifs_zap_mapping(inode); if (rc) { cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); rc = 0; /* don't care about it in fsync */ @@ -2562,7 +2562,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); if (written > 0) { - CIFS_I(inode)->invalid_mapping = true; + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); iocb->ki_pos = pos; } @@ -2649,7 +2649,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, * request comes - break it on the client to prevent reading * an old data. */ - cifs_invalidate_mapping(inode); + cifs_zap_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", inode); cinode->oplock = 0; @@ -3112,7 +3112,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) xid = get_xid(); if (!CIFS_CACHE_READ(CIFS_I(inode))) { - rc = cifs_invalidate_mapping(inode); + rc = cifs_zap_mapping(inode); if (rc) return rc; } @@ -3670,7 +3670,7 @@ void cifs_oplock_break(struct work_struct *work) if (!CIFS_CACHE_READ(cinode)) { rc = filemap_fdatawait(inode->i_mapping); mapping_set_error(inode->i_mapping, rc); - cifs_invalidate_mapping(inode); + cifs_zap_mapping(inode); } cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index aadc2b68678b..a174605f6afa 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -22,6 +22,7 @@ #include <linux/stat.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/freezer.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -117,7 +118,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n", __func__, cifs_i->uniqueid); - cifs_i->invalid_mapping = true; + set_bit(CIFS_INO_INVALID_MAPPING, &cifs_i->flags); } /* @@ -177,7 +178,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) else cifs_i->time = jiffies; - cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; + if (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING) + set_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags); + else + clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags); cifs_i->server_eof = fattr->cf_eof; /* @@ -1121,7 +1125,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, } /* try to set DELETE_ON_CLOSE */ - if (!cifsInode->delete_pending) { + if (!test_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags)) { rc = CIFSSMBSetFileDisposition(xid, tcon, true, fid.netfid, current->tgid); /* @@ -1138,7 +1142,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, rc = -EBUSY; goto undo_rename; } - cifsInode->delete_pending = true; + set_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags); } out_close: @@ -1737,6 +1741,9 @@ cifs_inode_needs_reval(struct inode *inode) if (cifs_i->time == 0) return true; + if (!cifs_sb->actimeo) + return true; + if (!time_in_range(jiffies, cifs_i->time, cifs_i->time + cifs_sb->actimeo)) return true; @@ -1756,23 +1763,62 @@ int cifs_invalidate_mapping(struct inode *inode) { int rc = 0; - struct cifsInodeInfo *cifs_i = CIFS_I(inode); - - cifs_i->invalid_mapping = false; if (inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = invalidate_inode_pages2(inode->i_mapping); - if (rc) { + if (rc) cifs_dbg(VFS, "%s: could not invalidate inode %p\n", __func__, inode); - cifs_i->invalid_mapping = true; - } } cifs_fscache_reset_inode_cookie(inode); return rc; } +/** + * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks + * @word: long word containing the bit lock + */ +static int +cifs_wait_bit_killable(void *word) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + freezable_schedule_unsafe(); + return 0; +} + +int +cifs_revalidate_mapping(struct inode *inode) +{ + int rc; + unsigned long *flags = &CIFS_I(inode)->flags; + + rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, + TASK_KILLABLE); + if (rc) + return rc; + + if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) { + rc = cifs_invalidate_mapping(inode); + if (rc) + set_bit(CIFS_INO_INVALID_MAPPING, flags); + } + + clear_bit_unlock(CIFS_INO_LOCK, flags); + smp_mb__after_atomic(); + wake_up_bit(flags, CIFS_INO_LOCK); + + return rc; +} + +int +cifs_zap_mapping(struct inode *inode) +{ + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); + return cifs_revalidate_mapping(inode); +} + int cifs_revalidate_file_attr(struct file *filp) { int rc = 0; @@ -1839,9 +1885,7 @@ int cifs_revalidate_file(struct file *filp) if (rc) return rc; - if (CIFS_I(inode)->invalid_mapping) - rc = cifs_invalidate_mapping(inode); - return rc; + return cifs_revalidate_mapping(inode); } /* revalidate a dentry's inode attributes */ @@ -1854,9 +1898,7 @@ int cifs_revalidate_dentry(struct dentry *dentry) if (rc) return rc; - if (CIFS_I(inode)->invalid_mapping) - rc = cifs_invalidate_mapping(inode); - return rc; + return cifs_revalidate_mapping(inode); } int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 77492301cc2b..45cb59bcc791 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -85,7 +85,7 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_fput; } - src_inode = src_file.file->f_dentry->d_inode; + src_inode = file_inode(src_file.file); /* * Note: cifs case is easier than btrfs since server responsible for diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 049884552e76..6834b9c3bec1 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -795,8 +795,8 @@ cifs_print_status(__u32 status_code) while (nt_errs[idx].nt_errstr != NULL) { if (((nt_errs[idx].nt_errcode) & 0xFFFFFF) == (status_code & 0xFFFFFF)) { - printk(KERN_NOTICE "Status code returned 0x%08x %s\n", - status_code, nt_errs[idx].nt_errstr); + pr_notice("Status code returned 0x%08x %s\n", + status_code, nt_errs[idx].nt_errstr); } idx++; } @@ -941,8 +941,9 @@ cifs_UnixTimeToNT(struct timespec t) return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET; } -static int total_days_of_prev_months[] = -{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; +static const int total_days_of_prev_months[] = { + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 +}; struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 35ddc3ed119d..787844bde384 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1047,6 +1047,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock) buf->ccontext.NameOffset = cpu_to_le16(offsetof (struct create_lease, Name)); buf->ccontext.NameLength = cpu_to_le16(4); + /* SMB2_CREATE_REQUEST_LEASE is "RqLs" */ buf->Name[0] = 'R'; buf->Name[1] = 'q'; buf->Name[2] = 'L'; @@ -1073,6 +1074,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) buf->ccontext.NameOffset = cpu_to_le16(offsetof (struct create_lease_v2, Name)); buf->ccontext.NameLength = cpu_to_le16(4); + /* SMB2_CREATE_REQUEST_LEASE is "RqLs" */ buf->Name[0] = 'R'; buf->Name[1] = 'q'; buf->Name[2] = 'L'; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3802f8c94acc..b0b260dbb19d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -375,7 +375,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); - memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); + /* ClientGUID must be zero for SMB2.02 dialect */ + if (ses->server->vals->protocol_id == SMB20_PROT_ID) + memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE); + else + memcpy(req->ClientGUID, server->client_guid, + SMB2_CLIENT_GUID_SIZE); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ @@ -478,7 +483,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) vneg_inbuf.Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); - memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); + memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, + SMB2_CLIENT_GUID_SIZE); if (tcon->ses->sign) vneg_inbuf.SecurityMode = @@ -966,6 +972,7 @@ create_durable_buf(void) buf->ccontext.NameOffset = cpu_to_le16(offsetof (struct create_durable, Name)); buf->ccontext.NameLength = cpu_to_le16(4); + /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DHnQ" */ buf->Name[0] = 'D'; buf->Name[1] = 'H'; buf->Name[2] = 'n'; @@ -990,6 +997,7 @@ create_reconnect_durable_buf(struct cifs_fid *fid) buf->ccontext.NameLength = cpu_to_le16(4); buf->Data.Fid.PersistentFileId = fid->persistent_fid; buf->Data.Fid.VolatileFileId = fid->volatile_fid; + /* SMB2_CREATE_DURABLE_HANDLE_RECONNECT is "DHnC" */ buf->Name[0] = 'D'; buf->Name[1] = 'H'; buf->Name[2] = 'n'; @@ -1089,6 +1097,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, int rc = 0; unsigned int num_iovecs = 2; __u32 file_attributes = 0; + char *dhc_buf = NULL, *lc_buf = NULL; cifs_dbg(FYI, "create/open\n"); @@ -1155,6 +1164,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, kfree(copy_path); return rc; } + lc_buf = iov[num_iovecs-1].iov_base; } if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { @@ -1169,9 +1179,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc) { cifs_small_buf_release(req); kfree(copy_path); - kfree(iov[num_iovecs-1].iov_base); + kfree(lc_buf); return rc; } + dhc_buf = iov[num_iovecs-1].iov_base; } rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); @@ -1203,6 +1214,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, *oplock = rsp->OplockLevel; creat_exit: kfree(copy_path); + kfree(lc_buf); + kfree(dhc_buf); free_rsp_buf(resp_buftype, rsp); return rc; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 2022c542ea3a..69f3595d3952 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -183,8 +183,6 @@ struct smb2_symlink_err_rsp { #define SMB2_CLIENT_GUID_SIZE 16 -extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; - struct smb2_negotiate_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 36 */ @@ -437,11 +435,15 @@ struct smb2_tree_disconnect_rsp { #define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ #define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" -#define SMB2_CREATE_ALLOCATION_SIZE "AlSi" +#define SMB2_CREATE_ALLOCATION_SIZE "AISi" #define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" #define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" #define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" #define SMB2_CREATE_REQUEST_LEASE "RqLs" +#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q" +#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C" +#define SMB2_CREATE_APP_INSTANCE_ID 0x45BCA66AEFA7F74A9008FA462E144D74 +#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9 struct smb2_create_req { struct smb2_hdr hdr; diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 911cf30d057d..7740b1c871c1 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -101,7 +101,7 @@ struct inode *coda_cnode_make(struct CodaFid *fid, struct super_block *sb) inode = coda_iget(sb, fid, &attr); if (IS_ERR(inode)) - printk("coda_cnode_make: coda_iget failed\n"); + pr_warn("%s: coda_iget failed\n", __func__); return inode; } @@ -137,7 +137,7 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) unsigned long hash = coda_f2i(fid); if ( !sb ) { - printk("coda_fid_to_inode: no sb!\n"); + pr_warn("%s: no sb!\n", __func__); return NULL; } diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index e7550cb9fb74..d42b725b1d21 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -12,6 +12,12 @@ #ifndef _LINUX_CODA_FS #define _LINUX_CODA_FS +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/param.h> #include <linux/mm.h> @@ -63,7 +69,7 @@ void coda_sysctl_clean(void); else \ ptr = (cast)vzalloc((unsigned long) size); \ if (!ptr) \ - printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ + pr_warn("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ } while (0) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 5efbb5ee0adc..cd8a63238b11 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -102,7 +102,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig int type = 0; if (length > CODA_MAXNAMLEN) { - printk(KERN_ERR "name too long: lookup, %s (%*s)\n", + pr_err("name too long: lookup, %s (%*s)\n", coda_i2s(dir), (int)length, name); return ERR_PTR(-ENAMETOOLONG); } @@ -453,23 +453,23 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) ret = kernel_read(host_file, ctx->pos - 2, (char *)vdir, sizeof(*vdir)); if (ret < 0) { - printk(KERN_ERR "coda readdir: read dir %s failed %d\n", - coda_f2s(&cii->c_fid), ret); + pr_err("%s: read dir %s failed %d\n", + __func__, coda_f2s(&cii->c_fid), ret); break; } if (ret == 0) break; /* end of directory file reached */ /* catch truncated reads */ if (ret < vdir_size || ret < vdir_size + vdir->d_namlen) { - printk(KERN_ERR "coda readdir: short read on %s\n", - coda_f2s(&cii->c_fid)); + pr_err("%s: short read on %s\n", + __func__, coda_f2s(&cii->c_fid)); ret = -EBADF; break; } /* validate whether the directory file actually makes sense */ if (vdir->d_reclen < vdir_size + vdir->d_namlen) { - printk(KERN_ERR "coda readdir: invalid dir %s\n", - coda_f2s(&cii->c_fid)); + pr_err("%s: invalid dir %s\n", + __func__, coda_f2s(&cii->c_fid)); ret = -EBADF; break; } @@ -589,8 +589,8 @@ int coda_revalidate_inode(struct inode *inode) coda_vattr_to_iattr(inode, &attr); if ((old_mode & S_IFMT) != (inode->i_mode & S_IFMT)) { - printk("Coda: inode %ld, fid %s changed type!\n", - inode->i_ino, coda_f2s(&(cii->c_fid))); + pr_warn("inode %ld, fid %s changed type!\n", + inode->i_ino, coda_f2s(&(cii->c_fid))); } /* the following can happen when a local fid is replaced diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d9c7751f10ac..fe3afb2de880 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -119,12 +119,12 @@ static int get_device_index(struct coda_mount_data *data) int idx; if (data == NULL) { - printk("coda_read_super: Bad mount data\n"); + pr_warn("%s: Bad mount data\n", __func__); return -1; } if (data->version != CODA_MOUNT_VERSION) { - printk("coda_read_super: Bad mount version\n"); + pr_warn("%s: Bad mount version\n", __func__); return -1; } @@ -141,13 +141,13 @@ static int get_device_index(struct coda_mount_data *data) fdput(f); if (idx < 0 || idx >= MAX_CODADEVS) { - printk("coda_read_super: Bad minor number\n"); + pr_warn("%s: Bad minor number\n", __func__); return -1; } return idx; Ebadf: - printk("coda_read_super: Bad file\n"); + pr_warn("%s: Bad file\n", __func__); return -1; } @@ -168,19 +168,19 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) if(idx == -1) idx = 0; - printk(KERN_INFO "coda_read_super: device index: %i\n", idx); + pr_info("%s: device index: %i\n", __func__, idx); vc = &coda_comms[idx]; mutex_lock(&vc->vc_mutex); if (!vc->vc_inuse) { - printk("coda_read_super: No pseudo device\n"); + pr_warn("%s: No pseudo device\n", __func__); error = -EINVAL; goto unlock_out; } if (vc->vc_sb) { - printk("coda_read_super: Device already mounted\n"); + pr_warn("%s: Device already mounted\n", __func__); error = -EBUSY; goto unlock_out; } @@ -204,22 +204,23 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) /* get root fid from Venus: this needs the root inode */ error = venus_rootfid(sb, &fid); if ( error ) { - printk("coda_read_super: coda_get_rootfid failed with %d\n", - error); + pr_warn("%s: coda_get_rootfid failed with %d\n", + __func__, error); goto error; } - printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid)); + pr_info("%s: rootfid is %s\n", __func__, coda_f2s(&fid)); /* make root inode */ root = coda_cnode_make(&fid, sb); if (IS_ERR(root)) { error = PTR_ERR(root); - printk("Failure of coda_cnode_make for root: error %d\n", error); + pr_warn("Failure of coda_cnode_make for root: error %d\n", + error); goto error; } - printk("coda_read_super: rootinode is %ld dev %s\n", - root->i_ino, root->i_sb->s_id); + pr_info("%s: rootinode is %ld dev %s\n", + __func__, root->i_ino, root->i_sb->s_id); sb->s_root = d_make_root(root); if (!sb->s_root) { error = -EINVAL; @@ -246,7 +247,7 @@ static void coda_put_super(struct super_block *sb) sb->s_fs_info = NULL; mutex_unlock(&vcp->vc_mutex); - printk("Coda: Bye bye.\n"); + pr_info("Bye bye.\n"); } static void coda_evict_inode(struct inode *inode) diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index ebc2bae6c289..5c1e4242368b 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -114,14 +114,14 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, int size = sizeof(*dcbuf); if ( nbytes < sizeof(struct coda_out_hdr) ) { - printk("coda_downcall opc %d uniq %d, not enough!\n", - hdr.opcode, hdr.unique); + pr_warn("coda_downcall opc %d uniq %d, not enough!\n", + hdr.opcode, hdr.unique); count = nbytes; goto out; } if ( nbytes > size ) { - printk("Coda: downcall opc %d, uniq %d, too much!", - hdr.opcode, hdr.unique); + pr_warn("downcall opc %d, uniq %d, too much!", + hdr.opcode, hdr.unique); nbytes = size; } CODA_ALLOC(dcbuf, union outputArgs *, nbytes); @@ -136,7 +136,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, CODA_FREE(dcbuf, nbytes); if (error) { - printk("psdev_write: coda_downcall error: %d\n", error); + pr_warn("%s: coda_downcall error: %d\n", + __func__, error); retval = error; goto out; } @@ -157,16 +158,17 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, mutex_unlock(&vcp->vc_mutex); if (!req) { - printk("psdev_write: msg (%d, %d) not found\n", - hdr.opcode, hdr.unique); + pr_warn("%s: msg (%d, %d) not found\n", + __func__, hdr.opcode, hdr.unique); retval = -ESRCH; goto out; } /* move data into response buffer. */ if (req->uc_outSize < nbytes) { - printk("psdev_write: too much cnt: %d, cnt: %ld, opc: %d, uniq: %d.\n", - req->uc_outSize, (long)nbytes, hdr.opcode, hdr.unique); + pr_warn("%s: too much cnt: %d, cnt: %ld, opc: %d, uniq: %d.\n", + __func__, req->uc_outSize, (long)nbytes, + hdr.opcode, hdr.unique); nbytes = req->uc_outSize; /* don't have more space! */ } if (copy_from_user(req->uc_data, buf, nbytes)) { @@ -240,8 +242,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, /* Move the input args into userspace */ count = req->uc_inSize; if (nbytes < req->uc_inSize) { - printk ("psdev_read: Venus read %ld bytes of %d in message\n", - (long)nbytes, req->uc_inSize); + pr_warn("%s: Venus read %ld bytes of %d in message\n", + __func__, (long)nbytes, req->uc_inSize); count = nbytes; } @@ -305,7 +307,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file) struct upc_req *req, *tmp; if (!vcp || !vcp->vc_inuse ) { - printk("psdev_release: Not open.\n"); + pr_warn("%s: Not open.\n", __func__); return -1; } @@ -354,8 +356,8 @@ static int init_coda_psdev(void) { int i, err = 0; if (register_chrdev(CODA_PSDEV_MAJOR, "coda", &coda_psdev_fops)) { - printk(KERN_ERR "coda_psdev: unable to get major %d\n", - CODA_PSDEV_MAJOR); + pr_err("%s: unable to get major %d\n", + __func__, CODA_PSDEV_MAJOR); return -EIO; } coda_psdev_class = class_create(THIS_MODULE, "coda"); @@ -393,13 +395,13 @@ static int __init init_coda(void) goto out2; status = init_coda_psdev(); if ( status ) { - printk("Problem (%d) in init_coda_psdev\n", status); + pr_warn("Problem (%d) in init_coda_psdev\n", status); goto out1; } status = register_filesystem(&coda_fs_type); if (status) { - printk("coda: failed to register filesystem!\n"); + pr_warn("failed to register filesystem!\n"); goto out; } return 0; @@ -420,9 +422,8 @@ static void __exit exit_coda(void) int err, i; err = unregister_filesystem(&coda_fs_type); - if ( err != 0 ) { - printk("coda: failed to unregister filesystem\n"); - } + if (err != 0) + pr_warn("failed to unregister filesystem\n"); for (i = 0; i < MAX_CODADEVS; i++) device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); class_destroy(coda_psdev_class); diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index af56ad56a89a..34218a8a28cd 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -14,7 +14,7 @@ #ifdef CONFIG_SYSCTL static struct ctl_table_header *fs_table_header; -static ctl_table coda_table[] = { +static struct ctl_table coda_table[] = { { .procname = "timeout", .data = &coda_timeout, @@ -39,7 +39,7 @@ static ctl_table coda_table[] = { {} }; -static ctl_table fs_table[] = { +static struct ctl_table fs_table[] = { { .procname = "coda", .mode = 0555, diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 3a731976dc5e..21fcf8dcb9cd 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -508,8 +508,8 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid, inp->coda_ioctl.data = (char *)(INSIZE(ioctl)); /* get the data out of user space */ - if ( copy_from_user((char*)inp + (long)inp->coda_ioctl.data, - data->vi.in, data->vi.in_size) ) { + if (copy_from_user((char *)inp + (long)inp->coda_ioctl.data, + data->vi.in, data->vi.in_size)) { error = -EINVAL; goto exit; } @@ -518,8 +518,8 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid, &outsize, inp); if (error) { - printk("coda_pioctl: Venus returns: %d for %s\n", - error, coda_f2s(fid)); + pr_warn("%s: Venus returns: %d for %s\n", + __func__, error, coda_f2s(fid)); goto exit; } @@ -675,7 +675,7 @@ static int coda_upcall(struct venus_comm *vcp, mutex_lock(&vcp->vc_mutex); if (!vcp->vc_inuse) { - printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n"); + pr_notice("Venus dead, not sending upcall\n"); error = -ENXIO; goto exit; } @@ -725,7 +725,7 @@ static int coda_upcall(struct venus_comm *vcp, error = -EINTR; if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) { - printk(KERN_WARNING "coda: Unexpected interruption.\n"); + pr_warn("Unexpected interruption.\n"); goto exit; } @@ -735,7 +735,7 @@ static int coda_upcall(struct venus_comm *vcp, /* Venus saw the upcall, make sure we can send interrupt signal */ if (!vcp->vc_inuse) { - printk(KERN_INFO "coda: Venus dead, not sending signal.\n"); + pr_info("Venus dead, not sending signal.\n"); goto exit; } diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index b5f0a3b91f18..bd4a3c167091 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -24,6 +24,12 @@ * configfs Copyright (C) 2005 Oracle. All rights reserved. */ +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e081acbac2e7..668dcabc5695 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -940,9 +940,9 @@ static void client_drop_item(struct config_item *parent_item, #ifdef DEBUG static void configfs_dump_one(struct configfs_dirent *sd, int level) { - printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd)); + pr_info("%*s\"%s\":\n", level, " ", configfs_get_name(sd)); -#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type); +#define type_print(_type) if (sd->s_type & _type) pr_info("%*s %s\n", level, " ", #_type); type_print(CONFIGFS_ROOT); type_print(CONFIGFS_DIR); type_print(CONFIGFS_ITEM_ATTR); @@ -1699,7 +1699,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) struct dentry *root = dentry->d_sb->s_root; if (dentry->d_parent != root) { - printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n"); + pr_err("Tried to unregister non-subsystem!\n"); return; } @@ -1709,7 +1709,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); if (configfs_detach_prep(dentry, NULL)) { - printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); + pr_err("Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index a9d35b0e06cf..5946ad98053f 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -168,9 +168,8 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd, * In practice the maximum level of locking depth is * already reached. Just inform about possible reasons. */ - printk(KERN_INFO "configfs: Too many levels of inodes" - " for the locking correctness validator.\n"); - printk(KERN_INFO "Spurious warnings may appear.\n"); + pr_info("Too many levels of inodes for the locking correctness validator.\n"); + pr_info("Spurious warnings may appear.\n"); } } } diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 50cee7f9110b..e65f9ffbb999 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -19,7 +19,7 @@ * Boston, MA 021110-1307, USA. * * Based on kobject: - * kobject is Copyright (c) 2002-2003 Patrick Mochel + * kobject is Copyright (c) 2002-2003 Patrick Mochel * * configfs Copyright (C) 2005 Oracle. All rights reserved. * @@ -35,9 +35,9 @@ #include <linux/configfs.h> -static inline struct config_item * to_item(struct list_head * entry) +static inline struct config_item *to_item(struct list_head *entry) { - return container_of(entry,struct config_item,ci_entry); + return container_of(entry, struct config_item, ci_entry); } /* Evil kernel */ @@ -47,34 +47,35 @@ static void config_item_release(struct kref *kref); * config_item_init - initialize item. * @item: item in question. */ -void config_item_init(struct config_item * item) +void config_item_init(struct config_item *item) { kref_init(&item->ci_kref); INIT_LIST_HEAD(&item->ci_entry); } +EXPORT_SYMBOL(config_item_init); /** * config_item_set_name - Set the name of an item * @item: item. - * @name: name. + * @fmt: The vsnprintf()'s format string. * * If strlen(name) >= CONFIGFS_ITEM_NAME_LEN, then use a * dynamically allocated string that @item->ci_name points to. * Otherwise, use the static @item->ci_namebuf array. */ -int config_item_set_name(struct config_item * item, const char * fmt, ...) +int config_item_set_name(struct config_item *item, const char *fmt, ...) { int error = 0; int limit = CONFIGFS_ITEM_NAME_LEN; int need; va_list args; - char * name; + char *name; /* * First, try the static array */ - va_start(args,fmt); - need = vsnprintf(item->ci_namebuf,limit,fmt,args); + va_start(args, fmt); + need = vsnprintf(item->ci_namebuf, limit, fmt, args); va_end(args); if (need < limit) name = item->ci_namebuf; @@ -83,13 +84,13 @@ int config_item_set_name(struct config_item * item, const char * fmt, ...) * Need more space? Allocate it and try again */ limit = need + 1; - name = kmalloc(limit,GFP_KERNEL); + name = kmalloc(limit, GFP_KERNEL); if (!name) { error = -ENOMEM; goto Done; } - va_start(args,fmt); - need = vsnprintf(name,limit,fmt,args); + va_start(args, fmt); + need = vsnprintf(name, limit, fmt, args); va_end(args); /* Still? Give up. */ @@ -109,7 +110,6 @@ int config_item_set_name(struct config_item * item, const char * fmt, ...) Done: return error; } - EXPORT_SYMBOL(config_item_set_name); void config_item_init_type_name(struct config_item *item, @@ -131,20 +131,21 @@ void config_group_init_type_name(struct config_group *group, const char *name, } EXPORT_SYMBOL(config_group_init_type_name); -struct config_item * config_item_get(struct config_item * item) +struct config_item *config_item_get(struct config_item *item) { if (item) kref_get(&item->ci_kref); return item; } +EXPORT_SYMBOL(config_item_get); -static void config_item_cleanup(struct config_item * item) +static void config_item_cleanup(struct config_item *item) { - struct config_item_type * t = item->ci_type; - struct config_group * s = item->ci_group; - struct config_item * parent = item->ci_parent; + struct config_item_type *t = item->ci_type; + struct config_group *s = item->ci_group; + struct config_item *parent = item->ci_parent; - pr_debug("config_item %s: cleaning up\n",config_item_name(item)); + pr_debug("config_item %s: cleaning up\n", config_item_name(item)); if (item->ci_name != item->ci_namebuf) kfree(item->ci_name); item->ci_name = NULL; @@ -167,21 +168,23 @@ static void config_item_release(struct kref *kref) * * Decrement the refcount, and if 0, call config_item_cleanup(). */ -void config_item_put(struct config_item * item) +void config_item_put(struct config_item *item) { if (item) kref_put(&item->ci_kref, config_item_release); } +EXPORT_SYMBOL(config_item_put); /** * config_group_init - initialize a group for use - * @k: group + * @group: config_group */ void config_group_init(struct config_group *group) { config_item_init(&group->cg_item); INIT_LIST_HEAD(&group->cg_children); } +EXPORT_SYMBOL(config_group_init); /** * config_group_find_item - search for item in group. @@ -195,11 +198,11 @@ void config_group_init(struct config_group *group) struct config_item *config_group_find_item(struct config_group *group, const char *name) { - struct list_head * entry; - struct config_item * ret = NULL; + struct list_head *entry; + struct config_item *ret = NULL; - list_for_each(entry,&group->cg_children) { - struct config_item * item = to_item(entry); + list_for_each(entry, &group->cg_children) { + struct config_item *item = to_item(entry); if (config_item_name(item) && !strcmp(config_item_name(item), name)) { ret = config_item_get(item); @@ -208,9 +211,4 @@ struct config_item *config_group_find_item(struct config_group *group, } return ret; } - -EXPORT_SYMBOL(config_item_init); -EXPORT_SYMBOL(config_group_init); -EXPORT_SYMBOL(config_item_get); -EXPORT_SYMBOL(config_item_put); EXPORT_SYMBOL(config_group_find_item); diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 7f26c3cf75ae..f6c285833390 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -85,7 +85,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); } else { - pr_debug("configfs: could not get root inode\n"); + pr_debug("could not get root inode\n"); return -ENOMEM; } @@ -155,7 +155,7 @@ static int __init configfs_init(void) return 0; out4: - printk(KERN_ERR "configfs: Unable to register filesystem!\n"); + pr_err("Unable to register filesystem!\n"); configfs_inode_exit(); out3: kobject_put(config_kobj); diff --git a/fs/dcache.c b/fs/dcache.c index 42ae01eefc07..1792d6075b4f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -150,7 +150,7 @@ static long get_nr_dentry_unused(void) return sum < 0 ? 0 : sum; } -int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, +int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); @@ -441,42 +441,12 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) - __releases(dentry->d_lock) +static void __dentry_kill(struct dentry *dentry) { - struct inode *inode; struct dentry *parent = NULL; bool can_free = true; - - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { - can_free = dentry->d_flags & DCACHE_MAY_FREE; - spin_unlock(&dentry->d_lock); - goto out; - } - - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -relock: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } - return dentry; /* try again with same dentry */ - } if (!IS_ROOT(dentry)) parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto relock; - } /* * The dentry is now unrecoverably dead to the world. @@ -520,9 +490,72 @@ relock: can_free = false; } spin_unlock(&dentry->d_lock); -out: if (likely(can_free)) dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry *dentry_kill(struct dentry *dentry) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); + return parent; + +failed: + spin_unlock(&dentry->d_lock); + cpu_relax(); + return dentry; /* try again with same dentry */ +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + spin_unlock(&dentry->d_lock); + rcu_read_lock(); +again: + parent = ACCESS_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + else + parent = NULL; return parent; } @@ -579,7 +612,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } @@ -797,8 +830,11 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry, *parent; while (!list_empty(list)) { + struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); + parent = lock_parent(dentry); + /* * The dispose list is isolated and dentries are not accounted * to the LRU here, so we can simply remove it from the list @@ -812,26 +848,33 @@ static void shrink_dentry_list(struct list_head *list) */ if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - */ - if (!parent) + + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + if (can_free) + dentry_free(dentry); continue; + } - if (unlikely(parent == dentry)) { - /* - * trylocks have failed and d_lock has been held the - * whole time, so it could not have been added to any - * other lists. Just add it back to the shrink list. - */ + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } + + __dentry_kill(dentry); + /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also @@ -839,8 +882,26 @@ static void shrink_dentry_list(struct list_head *list) * fragmentation. */ dentry = parent; - while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) - dentry = dentry_kill(dentry, 1); + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { + parent = lock_parent(dentry); + if (dentry->d_lockref.count != 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + break; + } + inode = dentry->d_inode; /* can't be NULL */ + if (unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + cpu_relax(); + continue; + } + __dentry_kill(dentry); + dentry = parent; + } } } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c71038079b47..cfe8466f7fef 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -10,6 +10,8 @@ * * ------------------------------------------------------------------------- */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> @@ -148,10 +150,10 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) /* * parse_mount_options(): - * Set @opts to mount options specified in @data. If an option is not - * specified in @data, set it to its default value. The exception is - * 'newinstance' option which can only be set/cleared on a mount (i.e. - * cannot be changed during remount). + * Set @opts to mount options specified in @data. If an option is not + * specified in @data, set it to its default value. The exception is + * 'newinstance' option which can only be set/cleared on a mount (i.e. + * cannot be changed during remount). * * Note: @data may be NULL (in which case all options are set to default). */ @@ -225,7 +227,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) break; #endif default: - printk(KERN_ERR "devpts: called with bogus options\n"); + pr_err("called with bogus options\n"); return -EINVAL; } } @@ -261,7 +263,7 @@ static int mknod_ptmx(struct super_block *sb) dentry = d_alloc_name(root, "ptmx"); if (!dentry) { - printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n"); + pr_err("Unable to alloc dentry for ptmx node\n"); goto out; } @@ -270,7 +272,7 @@ static int mknod_ptmx(struct super_block *sb) */ inode = new_inode(sb); if (!inode) { - printk(KERN_ERR "Unable to alloc inode for ptmx node\n"); + pr_err("Unable to alloc inode for ptmx node\n"); dput(dentry); goto out; } @@ -303,7 +305,7 @@ static void update_ptmx_mode(struct pts_fs_info *fsi) #else static inline void update_ptmx_mode(struct pts_fs_info *fsi) { - return; + return; } #endif @@ -333,9 +335,11 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) struct pts_mount_opts *opts = &fsi->mount_opts; if (opts->setuid) - seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, opts->uid)); + seq_printf(seq, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->uid)); if (opts->setgid) - seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); + seq_printf(seq, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->gid)); seq_printf(seq, ",mode=%03o", opts->mode); #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); @@ -396,7 +400,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) if (s->s_root) return 0; - printk(KERN_ERR "devpts: get root dentry failed\n"); + pr_err("get root dentry failed\n"); fail: return -ENOMEM; diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 76feb4b60fa6..d521bddf876d 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -157,11 +157,13 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, const char *buf, size_t len) { unsigned int x; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - - x = simple_strtoul(buf, NULL, 0); + rc = kstrtouint(buf, 0, &x); + if (rc) + return rc; if (check_zero && !x) return -EINVAL; @@ -730,7 +732,10 @@ static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf) static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, size_t len) { - cm->nodeid = simple_strtol(buf, NULL, 0); + int rc = kstrtoint(buf, 0, &cm->nodeid); + + if (rc) + return rc; return len; } @@ -742,7 +747,10 @@ static ssize_t comm_local_read(struct dlm_comm *cm, char *buf) static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, size_t len) { - cm->local= simple_strtol(buf, NULL, 0); + int rc = kstrtoint(buf, 0, &cm->local); + + if (rc) + return rc; if (cm->local && !local_comm) local_comm = cm; return len; @@ -846,7 +854,10 @@ static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, size_t len) { uint32_t seq = 0; - nd->nodeid = simple_strtol(buf, NULL, 0); + int rc = kstrtoint(buf, 0, &nd->nodeid); + + if (rc) + return rc; dlm_comm_seq(nd->nodeid, &seq); nd->comm_seq = seq; return len; @@ -860,7 +871,10 @@ static ssize_t node_weight_read(struct dlm_node *nd, char *buf) static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, size_t len) { - nd->weight = simple_strtol(buf, NULL, 0); + int rc = kstrtoint(buf, 0, &nd->weight); + + if (rc) + return rc; return len; } diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index b969deef9ebb..8d77ba7b1756 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -68,7 +68,7 @@ static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb, if (lkb->lkb_wait_type) seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); - return seq_printf(s, "\n"); + return seq_puts(s, "\n"); } static int print_format1(struct dlm_rsb *res, struct seq_file *s) @@ -92,31 +92,31 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) } if (res->res_nodeid > 0) - rv = seq_printf(s, "\" \nLocal Copy, Master is node %d\n", + rv = seq_printf(s, "\"\nLocal Copy, Master is node %d\n", res->res_nodeid); else if (res->res_nodeid == 0) - rv = seq_printf(s, "\" \nMaster Copy\n"); + rv = seq_puts(s, "\"\nMaster Copy\n"); else if (res->res_nodeid == -1) - rv = seq_printf(s, "\" \nLooking up master (lkid %x)\n", + rv = seq_printf(s, "\"\nLooking up master (lkid %x)\n", res->res_first_lkid); else - rv = seq_printf(s, "\" \nInvalid master %d\n", + rv = seq_printf(s, "\"\nInvalid master %d\n", res->res_nodeid); if (rv) goto out; /* Print the LVB: */ if (res->res_lvbptr) { - seq_printf(s, "LVB: "); + seq_puts(s, "LVB: "); for (i = 0; i < lvblen; i++) { if (i == lvblen / 2) - seq_printf(s, "\n "); + seq_puts(s, "\n "); seq_printf(s, "%02x ", (unsigned char) res->res_lvbptr[i]); } if (rsb_flag(res, RSB_VALNOTVALID)) - seq_printf(s, " (INVALID)"); - rv = seq_printf(s, "\n"); + seq_puts(s, " (INVALID)"); + rv = seq_puts(s, "\n"); if (rv) goto out; } @@ -133,21 +133,21 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) } /* Print the locks attached to this resource */ - seq_printf(s, "Granted Queue\n"); + seq_puts(s, "Granted Queue\n"); list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) { rv = print_format1_lock(s, lkb, res); if (rv) goto out; } - seq_printf(s, "Conversion Queue\n"); + seq_puts(s, "Conversion Queue\n"); list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) { rv = print_format1_lock(s, lkb, res); if (rv) goto out; } - seq_printf(s, "Waiting Queue\n"); + seq_puts(s, "Waiting Queue\n"); list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) { rv = print_format1_lock(s, lkb, res); if (rv) @@ -157,13 +157,13 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s) if (list_empty(&res->res_lookup)) goto out; - seq_printf(s, "Lookup Queue\n"); + seq_puts(s, "Lookup Queue\n"); list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) { rv = seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_rqmode)); if (lkb->lkb_wait_type) seq_printf(s, " wait_type: %d", lkb->lkb_wait_type); - rv = seq_printf(s, "\n"); + rv = seq_puts(s, "\n"); } out: unlock_rsb(res); @@ -300,7 +300,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) else seq_printf(s, " %02x", (unsigned char)r->res_name[i]); } - rv = seq_printf(s, "\n"); + rv = seq_puts(s, "\n"); if (rv) goto out; @@ -311,7 +311,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) for (i = 0; i < lvblen; i++) seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]); - rv = seq_printf(s, "\n"); + rv = seq_puts(s, "\n"); if (rv) goto out; @@ -377,7 +377,7 @@ static int print_format4(struct dlm_rsb *r, struct seq_file *s) else seq_printf(s, " %02x", (unsigned char)r->res_name[i]); } - rv = seq_printf(s, "\n"); + rv = seq_puts(s, "\n"); out: unlock_rsb(r); return rv; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 04d6398c1f1c..f3e72787e7f9 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -35,8 +35,11 @@ static struct task_struct * scand_task; static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) { ssize_t ret = len; - int n = simple_strtol(buf, NULL, 0); + int n; + int rc = kstrtoint(buf, 0, &n); + if (rc) + return rc; ls = dlm_find_lockspace_local(ls->ls_local_handle); if (!ls) return -EINVAL; @@ -57,7 +60,10 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len) { - ls->ls_uevent_result = simple_strtol(buf, NULL, 0); + int rc = kstrtoint(buf, 0, &ls->ls_uevent_result); + + if (rc) + return rc; set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags); wake_up(&ls->ls_uevent_wait); return len; @@ -70,7 +76,10 @@ static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf) static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len) { - ls->ls_global_id = simple_strtoul(buf, NULL, 0); + int rc = kstrtouint(buf, 0, &ls->ls_global_id); + + if (rc) + return rc; return len; } @@ -81,7 +90,11 @@ static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf) static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len) { - int val = simple_strtoul(buf, NULL, 0); + int val; + int rc = kstrtoint(buf, 0, &val); + + if (rc) + return rc; if (val == 1) set_bit(LSFL_NODIR, &ls->ls_flags); return len; diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 9280202e488c..1de7294aad20 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -50,7 +50,7 @@ static void drop_slab(void) } while (nr_objects > 10); } -int drop_caches_sysctl_handler(ctl_table *table, int write, +int drop_caches_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { int ret; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index becc725a1953..0a48886e069c 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -83,7 +83,7 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) return 0; } -static struct dentry_operations efivarfs_d_ops = { +static const struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, .d_delete = always_delete_dentry, diff --git a/fs/efs/dir.c b/fs/efs/dir.c index b72307ccdf7a..ce63b24f7c3e 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -26,7 +26,8 @@ static int efs_readdir(struct file *file, struct dir_context *ctx) int slot; if (inode->i_size & (EFS_DIRBSIZE-1)) - printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); + pr_warn("%s(): directory size not a multiple of EFS_DIRBSIZE\n", + __func__); /* work out where this entry can be found */ block = ctx->pos >> EFS_DIRBSIZE_BITS; @@ -43,14 +44,15 @@ static int efs_readdir(struct file *file, struct dir_context *ctx) bh = sb_bread(inode->i_sb, efs_bmap(inode, block)); if (!bh) { - printk(KERN_ERR "EFS: readdir(): failed to read dir block %d\n", block); + pr_err("%s(): failed to read dir block %d\n", + __func__, block); break; } dirblock = (struct efs_dir *) bh->b_data; if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) { - printk(KERN_ERR "EFS: readdir(): invalid directory block\n"); + pr_err("%s(): invalid directory block\n", __func__); brelse(bh); break; } @@ -69,10 +71,9 @@ static int efs_readdir(struct file *file, struct dir_context *ctx) inodenum = be32_to_cpu(dirslot->inode); namelen = dirslot->namelen; nameptr = dirslot->name; - -#ifdef DEBUG - printk(KERN_DEBUG "EFS: readdir(): block %d slot %d/%d: inode %u, name \"%s\", namelen %u\n", block, slot, dirblock->slots-1, inodenum, nameptr, namelen); -#endif + pr_debug("%s(): block %d slot %d/%d: inode %u, name \"%s\", namelen %u\n", + __func__, block, slot, dirblock->slots-1, + inodenum, nameptr, namelen); if (!namelen) continue; /* found the next entry */ @@ -80,7 +81,8 @@ static int efs_readdir(struct file *file, struct dir_context *ctx) /* sanity check */ if (nameptr - (char *) dirblock + namelen > EFS_DIRBSIZE) { - printk(KERN_WARNING "EFS: directory entry %d exceeds directory block\n", slot); + pr_warn("directory entry %d exceeds directory block\n", + slot); continue; } diff --git a/fs/efs/efs.h b/fs/efs/efs.h index 5528926ac7f6..5bbf9612140c 100644 --- a/fs/efs/efs.h +++ b/fs/efs/efs.h @@ -7,6 +7,12 @@ #ifndef _EFS_EFS_H_ #define _EFS_EFS_H_ +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/fs.h> #include <asm/uaccess.h> diff --git a/fs/efs/file.c b/fs/efs/file.c index 1ccb364ffa63..a37dcee46866 100644 --- a/fs/efs/file.c +++ b/fs/efs/file.c @@ -22,10 +22,8 @@ int efs_get_block(struct inode *inode, sector_t iblock, /* * i have no idea why this happens as often as it does */ - printk(KERN_WARNING "EFS: bmap(): block %d >= %ld (filesize %ld)\n", - block, - inode->i_blocks, - inode->i_size); + pr_warn("%s(): block %d >= %ld (filesize %ld)\n", + __func__, block, inode->i_blocks, inode->i_size); #endif return 0; } @@ -38,7 +36,7 @@ int efs_get_block(struct inode *inode, sector_t iblock, int efs_bmap(struct inode *inode, efs_block_t block) { if (block < 0) { - printk(KERN_WARNING "EFS: bmap(): block < 0\n"); + pr_warn("%s(): block < 0\n", __func__); return 0; } @@ -48,10 +46,8 @@ int efs_bmap(struct inode *inode, efs_block_t block) { /* * i have no idea why this happens as often as it does */ - printk(KERN_WARNING "EFS: bmap(): block %d >= %ld (filesize %ld)\n", - block, - inode->i_blocks, - inode->i_size); + pr_warn("%s(): block %d >= %ld (filesize %ld)\n", + __func__, block, inode->i_blocks, inode->i_size); #endif return 0; } diff --git a/fs/efs/inode.c b/fs/efs/inode.c index d15ccf20f1b3..079d20306ee1 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -89,7 +89,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) bh = sb_bread(inode->i_sb, block); if (!bh) { - printk(KERN_WARNING "EFS: bread() failed at block %d\n", block); + pr_warn("%s() failed at block %d\n", __func__, block); goto read_inode_error; } @@ -130,19 +130,16 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) for(i = 0; i < EFS_DIRECTEXTENTS; i++) { extent_copy(&(efs_inode->di_u.di_extents[i]), &(in->extents[i])); if (i < in->numextents && in->extents[i].cooked.ex_magic != 0) { - printk(KERN_WARNING "EFS: extent %d has bad magic number in inode %lu\n", i, inode->i_ino); + pr_warn("extent %d has bad magic number in inode %lu\n", + i, inode->i_ino); brelse(bh); goto read_inode_error; } } brelse(bh); - -#ifdef DEBUG - printk(KERN_DEBUG "EFS: efs_iget(): inode %lu, extents %d, mode %o\n", - inode->i_ino, in->numextents, inode->i_mode); -#endif - + pr_debug("efs_iget(): inode %lu, extents %d, mode %o\n", + inode->i_ino, in->numextents, inode->i_mode); switch (inode->i_mode & S_IFMT) { case S_IFDIR: inode->i_op = &efs_dir_inode_operations; @@ -162,7 +159,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) init_special_inode(inode, inode->i_mode, device); break; default: - printk(KERN_WARNING "EFS: unsupported inode mode %o\n", inode->i_mode); + pr_warn("unsupported inode mode %o\n", inode->i_mode); goto read_inode_error; break; } @@ -171,7 +168,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino) return inode; read_inode_error: - printk(KERN_WARNING "EFS: failed to read inode %lu\n", inode->i_ino); + pr_warn("failed to read inode %lu\n", inode->i_ino); iget_failed(inode); return ERR_PTR(-EIO); } @@ -216,7 +213,7 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { /* if we only have one extent then nothing can be found */ if (in->numextents == 1) { - printk(KERN_ERR "EFS: map_block() failed to map (1 extent)\n"); + pr_err("%s() failed to map (1 extent)\n", __func__); return 0; } @@ -234,13 +231,12 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { } } - printk(KERN_ERR "EFS: map_block() failed to map block %u (dir)\n", block); + pr_err("%s() failed to map block %u (dir)\n", __func__, block); return 0; } -#ifdef DEBUG - printk(KERN_DEBUG "EFS: map_block(): indirect search for logical block %u\n", block); -#endif + pr_debug("%s(): indirect search for logical block %u\n", + __func__, block); direxts = in->extents[0].cooked.ex_offset; indexts = in->numextents; @@ -262,7 +258,8 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { if (dirext == direxts) { /* should never happen */ - printk(KERN_ERR "EFS: couldn't find direct extent for indirect extent %d (block %u)\n", cur, block); + pr_err("couldn't find direct extent for indirect extent %d (block %u)\n", + cur, block); if (bh) brelse(bh); return 0; } @@ -279,12 +276,12 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { bh = sb_bread(inode->i_sb, iblock); if (!bh) { - printk(KERN_ERR "EFS: bread() failed at block %d\n", iblock); + pr_err("%s() failed at block %d\n", + __func__, iblock); return 0; } -#ifdef DEBUG - printk(KERN_DEBUG "EFS: map_block(): read indirect extent block %d\n", iblock); -#endif + pr_debug("%s(): read indirect extent block %d\n", + __func__, iblock); first = 0; lastblock = iblock; } @@ -294,7 +291,8 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { extent_copy(&(exts[ioffset]), &ext); if (ext.cooked.ex_magic != 0) { - printk(KERN_ERR "EFS: extent %d has bad magic number in block %d\n", cur, iblock); + pr_err("extent %d has bad magic number in block %d\n", + cur, iblock); if (bh) brelse(bh); return 0; } @@ -306,7 +304,7 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { } } if (bh) brelse(bh); - printk(KERN_ERR "EFS: map_block() failed to map block %u (indir)\n", block); + pr_err("%s() failed to map block %u (indir)\n", __func__, block); return 0; } diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 96f66d213a19..356c044e2cd3 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -23,20 +23,22 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) efs_block_t block; if (inode->i_size & (EFS_DIRBSIZE-1)) - printk(KERN_WARNING "EFS: WARNING: find_entry(): directory size not a multiple of EFS_DIRBSIZE\n"); + pr_warn("%s(): directory size not a multiple of EFS_DIRBSIZE\n", + __func__); for(block = 0; block < inode->i_blocks; block++) { bh = sb_bread(inode->i_sb, efs_bmap(inode, block)); if (!bh) { - printk(KERN_ERR "EFS: find_entry(): failed to read dir block %d\n", block); + pr_err("%s(): failed to read dir block %d\n", + __func__, block); return 0; } dirblock = (struct efs_dir *) bh->b_data; if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) { - printk(KERN_ERR "EFS: find_entry(): invalid directory block\n"); + pr_err("%s(): invalid directory block\n", __func__); brelse(bh); return(0); } diff --git a/fs/efs/super.c b/fs/efs/super.c index 3befcc9f5d63..7fca462ea4e3 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -134,7 +134,7 @@ static const struct export_operations efs_export_ops = { static int __init init_efs_fs(void) { int err; - printk("EFS: "EFS_VERSION" - http://aeschi.ch.eu.org/efs/\n"); + pr_info(EFS_VERSION" - http://aeschi.ch.eu.org/efs/\n"); err = init_inodecache(); if (err) goto out1; @@ -179,12 +179,12 @@ static efs_block_t efs_validate_vh(struct volume_header *vh) { csum += be32_to_cpu(cs); } if (csum) { - printk(KERN_INFO "EFS: SGI disklabel: checksum bad, label corrupted\n"); + pr_warn("SGI disklabel: checksum bad, label corrupted\n"); return 0; } #ifdef DEBUG - printk(KERN_DEBUG "EFS: bf: \"%16s\"\n", vh->vh_bootfile); + pr_debug("bf: \"%16s\"\n", vh->vh_bootfile); for(i = 0; i < NVDIR; i++) { int j; @@ -196,9 +196,8 @@ static efs_block_t efs_validate_vh(struct volume_header *vh) { name[j] = (char) 0; if (name[0]) { - printk(KERN_DEBUG "EFS: vh: %8s block: 0x%08x size: 0x%08x\n", - name, - (int) be32_to_cpu(vh->vh_vd[i].vd_lbn), + pr_debug("vh: %8s block: 0x%08x size: 0x%08x\n", + name, (int) be32_to_cpu(vh->vh_vd[i].vd_lbn), (int) be32_to_cpu(vh->vh_vd[i].vd_nbytes)); } } @@ -211,12 +210,11 @@ static efs_block_t efs_validate_vh(struct volume_header *vh) { } #ifdef DEBUG if (be32_to_cpu(vh->vh_pt[i].pt_nblks)) { - printk(KERN_DEBUG "EFS: pt %2d: start: %08d size: %08d type: 0x%02x (%s)\n", - i, - (int) be32_to_cpu(vh->vh_pt[i].pt_firstlbn), - (int) be32_to_cpu(vh->vh_pt[i].pt_nblks), - pt_type, - (pt_entry->pt_name) ? pt_entry->pt_name : "unknown"); + pr_debug("pt %2d: start: %08d size: %08d type: 0x%02x (%s)\n", + i, (int)be32_to_cpu(vh->vh_pt[i].pt_firstlbn), + (int)be32_to_cpu(vh->vh_pt[i].pt_nblks), + pt_type, (pt_entry->pt_name) ? + pt_entry->pt_name : "unknown"); } #endif if (IS_EFS(pt_type)) { @@ -226,11 +224,10 @@ static efs_block_t efs_validate_vh(struct volume_header *vh) { } if (slice == -1) { - printk(KERN_NOTICE "EFS: partition table contained no EFS partitions\n"); + pr_notice("partition table contained no EFS partitions\n"); #ifdef DEBUG } else { - printk(KERN_INFO "EFS: using slice %d (type %s, offset 0x%x)\n", - slice, + pr_info("using slice %d (type %s, offset 0x%x)\n", slice, (pt_entry->pt_name) ? pt_entry->pt_name : "unknown", sblock); #endif @@ -268,7 +265,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) s->s_magic = EFS_SUPER_MAGIC; if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) { - printk(KERN_ERR "EFS: device does not support %d byte blocks\n", + pr_err("device does not support %d byte blocks\n", EFS_BLOCKSIZE); return -EINVAL; } @@ -277,7 +274,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) bh = sb_bread(s, 0); if (!bh) { - printk(KERN_ERR "EFS: cannot read volume header\n"); + pr_err("cannot read volume header\n"); return -EINVAL; } @@ -295,13 +292,14 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) bh = sb_bread(s, sb->fs_start + EFS_SUPER); if (!bh) { - printk(KERN_ERR "EFS: cannot read superblock\n"); + pr_err("cannot read superblock\n"); return -EINVAL; } if (efs_validate_super(sb, (struct efs_super *) bh->b_data)) { #ifdef DEBUG - printk(KERN_WARNING "EFS: invalid superblock at block %u\n", sb->fs_start + EFS_SUPER); + pr_warn("invalid superblock at block %u\n", + sb->fs_start + EFS_SUPER); #endif brelse(bh); return -EINVAL; @@ -310,7 +308,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) if (!(s->s_flags & MS_RDONLY)) { #ifdef DEBUG - printk(KERN_INFO "EFS: forcing read-only mode\n"); + pr_info("forcing read-only mode\n"); #endif s->s_flags |= MS_RDONLY; } @@ -318,13 +316,13 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) s->s_export_op = &efs_export_ops; root = efs_iget(s, EFS_ROOTINODE); if (IS_ERR(root)) { - printk(KERN_ERR "EFS: get root inode failed\n"); + pr_err("get root inode failed\n"); return PTR_ERR(root); } s->s_root = d_make_root(root); if (!(s->s_root)) { - printk(KERN_ERR "EFS: get root dentry failed\n"); + pr_err("get root dentry failed\n"); return -ENOMEM; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index af903128891c..b73e0621ce9e 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -293,7 +293,7 @@ static LIST_HEAD(tfile_check_list); static long zero; static long long_max = LONG_MAX; -ctl_table epoll_table[] = { +struct ctl_table epoll_table[] = { { .procname = "max_user_watches", .data = &max_user_watches, diff --git a/fs/exec.c b/fs/exec.c index 476f3ebf437e..238b7aa26f68 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -657,10 +657,10 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP - /* Limit stack size to 1GB */ + /* Limit stack size */ stack_base = rlimit_max(RLIMIT_STACK); - if (stack_base > (1 << 30)) - stack_base = 1 << 30; + if (stack_base > STACK_SIZE_MAX) + stack_base = STACK_SIZE_MAX; /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore index 1ca7fb7b6ba8..2daf2329c28d 100644 --- a/fs/exofs/Kconfig.ore +++ b/fs/exofs/Kconfig.ore @@ -9,4 +9,6 @@ config ORE tristate depends on EXOFS_FS || PNFS_OBJLAYOUT select ASYNC_XOR + select RAID6_PQ + select ASYNC_PQ default SCSI_OSD_ULD diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index dae884694bd9..cfc0205d62c4 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -58,9 +58,12 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout) layout->parity = 1; break; case PNFS_OSD_RAID_PQ: + layout->parity = 2; + break; case PNFS_OSD_RAID_4: default: - ORE_ERR("Only RAID_0/5 for now\n"); + ORE_ERR("Only RAID_0/5/6 for now received-enum=%d\n", + layout->raid_algorithm); return -EINVAL; } if (0 != (layout->stripe_unit & ~PAGE_MASK)) { @@ -112,6 +115,8 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout) layout->max_io_length /= stripe_length; layout->max_io_length *= stripe_length; } + ORE_DBGMSG("max_io_length=0x%lx\n", layout->max_io_length); + return 0; } EXPORT_SYMBOL(ore_verify_layout); @@ -545,21 +550,24 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, /* "H - (N * U)" is just "H % U" so it's bound to u32 */ u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; + u32 first_dev = C - C % group_width; div_u64_rem(file_offset, stripe_unit, &si->unit_off); si->obj_offset = si->unit_off + (N * stripe_unit) + (M * group_depth * stripe_unit); + si->cur_comp = C - first_dev; + si->cur_pg = si->unit_off / PAGE_SIZE; if (parity) { u32 LCMdP = lcm(group_width, parity) / parity; /* R = N % LCMdP; */ u32 RxP = (N % LCMdP) * parity; - u32 first_dev = C - C % group_width; si->par_dev = (group_width + group_width - parity - RxP) % group_width + first_dev; - si->dev = (group_width + C - RxP) % group_width + first_dev; + si->dev = (group_width + group_width + C - RxP) % + group_width + first_dev; si->bytes_in_stripe = U; si->first_stripe_start = M * S + G * T + N * U; } else { @@ -649,6 +657,43 @@ out: /* we fail the complete unit on an error eg don't advance return ret; } +static int _add_parity_units(struct ore_io_state *ios, + struct ore_striping_info *si, + unsigned dev, unsigned first_dev, + unsigned mirrors_p1, unsigned devs_in_group, + unsigned cur_len) +{ + unsigned do_parity; + int ret = 0; + + for (do_parity = ios->layout->parity; do_parity; --do_parity) { + struct ore_per_dev_state *per_dev; + + per_dev = &ios->per_dev[dev - first_dev]; + if (!per_dev->length && !per_dev->offset) { + /* Only/always the parity unit of the first + * stripe will be empty. So this is a chance to + * initialize the per_dev info. + */ + per_dev->dev = dev; + per_dev->offset = si->obj_offset - si->unit_off; + } + + ret = _ore_add_parity_unit(ios, si, per_dev, cur_len, + do_parity == 1); + if (unlikely(ret)) + break; + + if (do_parity != 1) { + dev = ((dev + mirrors_p1) % devs_in_group) + first_dev; + si->cur_comp = (si->cur_comp + 1) % + ios->layout->group_width; + } + } + + return ret; +} + static int _prepare_for_striping(struct ore_io_state *ios) { struct ore_striping_info *si = &ios->si; @@ -658,7 +703,6 @@ static int _prepare_for_striping(struct ore_io_state *ios) unsigned devs_in_group = group_width * mirrors_p1; unsigned dev = si->dev; unsigned first_dev = dev - (dev % devs_in_group); - unsigned dev_order; unsigned cur_pg = ios->pages_consumed; u64 length = ios->length; int ret = 0; @@ -670,16 +714,13 @@ static int _prepare_for_striping(struct ore_io_state *ios) BUG_ON(length > si->length); - dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev); - si->cur_comp = dev_order; - si->cur_pg = si->unit_off / PAGE_SIZE; - while (length) { - unsigned comp = dev - first_dev; - struct ore_per_dev_state *per_dev = &ios->per_dev[comp]; + struct ore_per_dev_state *per_dev = + &ios->per_dev[dev - first_dev]; unsigned cur_len, page_off = 0; - if (!per_dev->length) { + if (!per_dev->length && !per_dev->offset) { + /* First time initialize the per_dev info. */ per_dev->dev = dev; if (dev == si->dev) { WARN_ON(dev == si->par_dev); @@ -688,13 +729,7 @@ static int _prepare_for_striping(struct ore_io_state *ios) page_off = si->unit_off & ~PAGE_MASK; BUG_ON(page_off && (page_off != ios->pgbase)); } else { - if (si->cur_comp > dev_order) - per_dev->offset = - si->obj_offset - si->unit_off; - else /* si->cur_comp < dev_order */ - per_dev->offset = - si->obj_offset + stripe_unit - - si->unit_off; + per_dev->offset = si->obj_offset - si->unit_off; cur_len = stripe_unit; } } else { @@ -708,11 +743,9 @@ static int _prepare_for_striping(struct ore_io_state *ios) if (unlikely(ret)) goto out; - dev += mirrors_p1; - dev = (dev % devs_in_group) + first_dev; - length -= cur_len; + dev = ((dev + mirrors_p1) % devs_in_group) + first_dev; si->cur_comp = (si->cur_comp + 1) % group_width; if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) { if (!length && ios->sp2d) { @@ -720,23 +753,16 @@ static int _prepare_for_striping(struct ore_io_state *ios) * stripe. then operate on parity dev. */ dev = si->par_dev; - } - if (ios->sp2d) - /* In writes cur_len just means if it's the - * last one. See _ore_add_parity_unit. - */ - cur_len = length; - per_dev = &ios->per_dev[dev - first_dev]; - if (!per_dev->length) { - /* Only/always the parity unit of the first - * stripe will be empty. So this is a chance to - * initialize the per_dev info. - */ - per_dev->dev = dev; - per_dev->offset = si->obj_offset - si->unit_off; + /* If last stripe operate on parity comp */ + si->cur_comp = group_width - ios->layout->parity; } - ret = _ore_add_parity_unit(ios, si, per_dev, cur_len); + /* In writes cur_len just means if it's the + * last one. See _ore_add_parity_unit. + */ + ret = _add_parity_units(ios, si, dev, first_dev, + mirrors_p1, devs_in_group, + ios->sp2d ? length : cur_len); if (unlikely(ret)) goto out; @@ -747,6 +773,8 @@ static int _prepare_for_striping(struct ore_io_state *ios) /* Next stripe, start fresh */ si->cur_comp = 0; si->cur_pg = 0; + si->obj_offset += cur_len; + si->unit_off = 0; } } out: diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 4e2c032ab8a1..7f20f25c232c 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -218,22 +218,28 @@ static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d) static void _gen_xor_unit(struct __stripe_pages_2d *sp2d) { unsigned p; + unsigned tx_flags = ASYNC_TX_ACK; + + if (sp2d->parity == 1) + tx_flags |= ASYNC_TX_XOR_ZERO_DST; + for (p = 0; p < sp2d->pages_in_unit; p++) { struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; if (!_1ps->write_count) continue; - init_async_submit(&_1ps->submit, - ASYNC_TX_XOR_ZERO_DST | ASYNC_TX_ACK, - NULL, - NULL, NULL, - (addr_conv_t *)_1ps->scribble); - - /* TODO: raid6 */ - _1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], _1ps->pages, - 0, sp2d->data_devs, PAGE_SIZE, - &_1ps->submit); + init_async_submit(&_1ps->submit, tx_flags, + NULL, NULL, NULL, (addr_conv_t *)_1ps->scribble); + + if (sp2d->parity == 1) + _1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], + _1ps->pages, 0, sp2d->data_devs, + PAGE_SIZE, &_1ps->submit); + else /* parity == 2 */ + _1ps->tx = async_gen_syndrome(_1ps->pages, 0, + sp2d->data_devs + sp2d->parity, + PAGE_SIZE, &_1ps->submit); } for (p = 0; p < sp2d->pages_in_unit; p++) { @@ -404,9 +410,8 @@ static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset) ore_calc_stripe_info(ios->layout, *offset, 0, &si); - p = si.unit_off / PAGE_SIZE; - c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, - ios->layout->mirrors_p1, si.par_dev, si.dev); + p = si.cur_pg; + c = si.cur_comp; page = ios->sp2d->_1p_stripes[p].pages[c]; pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); @@ -534,9 +539,8 @@ static int _read_4_write_last_stripe(struct ore_io_state *ios) goto read_it; ore_calc_stripe_info(ios->layout, offset, 0, &read_si); - p = read_si.unit_off / PAGE_SIZE; - c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, - ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); + p = read_si.cur_pg; + c = read_si.cur_comp; if (min_p == sp2d->pages_in_unit) { /* Didn't do it yet */ @@ -620,7 +624,7 @@ static int _read_4_write_execute(struct ore_io_state *ios) int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si, struct ore_per_dev_state *per_dev, - unsigned cur_len) + unsigned cur_len, bool do_xor) { if (ios->reading) { if (per_dev->cur_sg >= ios->sgs_per_dev) { @@ -640,17 +644,16 @@ int _ore_add_parity_unit(struct ore_io_state *ios, si->cur_pg = _sp2d_min_pg(sp2d); num_pages = _sp2d_max_pg(sp2d) + 1 - si->cur_pg; - if (!cur_len) /* If last stripe operate on parity comp */ - si->cur_comp = sp2d->data_devs; - if (!per_dev->length) { per_dev->offset += si->cur_pg * PAGE_SIZE; /* If first stripe, Read in all read4write pages * (if needed) before we calculate the first parity. */ - _read_4_write_first_stripe(ios); + if (do_xor) + _read_4_write_first_stripe(ios); } - if (!cur_len) /* If last stripe r4w pages of last stripe */ + if (!cur_len && do_xor) + /* If last stripe r4w pages of last stripe */ _read_4_write_last_stripe(ios); _read_4_write_execute(ios); @@ -662,7 +665,7 @@ int _ore_add_parity_unit(struct ore_io_state *ios, ++(ios->cur_par_page); } - BUG_ON(si->cur_comp != sp2d->data_devs); + BUG_ON(si->cur_comp < sp2d->data_devs); BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit); ret = _ore_add_stripe_unit(ios, &array_start, 0, pages, @@ -670,9 +673,10 @@ int _ore_add_parity_unit(struct ore_io_state *ios, if (unlikely(ret)) return ret; - /* TODO: raid6 if (last_parity_dev) */ - _gen_xor_unit(sp2d); - _sp2d_reset(sp2d, ios->r4w, ios->private); + if (do_xor) { + _gen_xor_unit(sp2d); + _sp2d_reset(sp2d, ios->r4w, ios->private); + } } return 0; } diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h index 2ffd2c3c6e46..cf6375d82129 100644 --- a/fs/exofs/ore_raid.h +++ b/fs/exofs/ore_raid.h @@ -31,24 +31,6 @@ #define ORE_DBGMSG2(M...) do {} while (0) /* #define ORE_DBGMSG2 ORE_DBGMSG */ -/* Calculate the component order in a stripe. eg the logical data unit - * address within the stripe of @dev given the @par_dev of this stripe. - */ -static inline unsigned _dev_order(unsigned devs_in_group, unsigned mirrors_p1, - unsigned par_dev, unsigned dev) -{ - unsigned first_dev = dev - dev % devs_in_group; - - dev -= first_dev; - par_dev -= first_dev; - - if (devs_in_group == par_dev) /* The raid 0 case */ - return dev / mirrors_p1; - /* raid4/5/6 case */ - return ((devs_in_group + dev - par_dev - mirrors_p1) % devs_in_group) / - mirrors_p1; -} - /* ios_raid.c stuff needed by ios.c */ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios); void _ore_free_raid_stuff(struct ore_io_state *ios); @@ -56,7 +38,8 @@ void _ore_free_raid_stuff(struct ore_io_state *ios); void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len, bool not_last); int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si, - struct ore_per_dev_state *per_dev, unsigned cur_len); + struct ore_per_dev_state *per_dev, unsigned cur_len, + bool do_xor); void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d, struct ore_striping_info *si, struct page *page); static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d, diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 48a359dd286e..b01fbfb51f43 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -259,7 +259,7 @@ static int filldir_one(void * __buf, const char * name, int len, /** * get_name - default export_operations->get_name function - * @dentry: the directory in which to find a name + * @path: the directory in which to find a name * @name: a pointer to a %NAME_MAX+1 char buffer to store the name * @child: the dentry for the child directory. * @@ -337,7 +337,7 @@ out: /** * export_encode_fh - default export_operations->encode_fh function * @inode: the object to encode - * @fh: where to store the file handle fragment + * @fid: where to store the file handle fragment * @max_len: maximum length to store there * @parent: parent directory inode, if wanted * diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5c56785007e0..0762d143e252 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -83,9 +83,9 @@ static inline int ext4_block_in_group(struct super_block *sb, /* Return the number of clusters used for file system metadata; this * represents the overhead needed by the file system. */ -unsigned ext4_num_overhead_clusters(struct super_block *sb, - ext4_group_t block_group, - struct ext4_group_desc *gdp) +static unsigned ext4_num_overhead_clusters(struct super_block *sb, + ext4_group_t block_group, + struct ext4_group_desc *gdp) { unsigned num_clusters; int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; @@ -176,9 +176,10 @@ static unsigned int num_clusters_in_group(struct super_block *sb, } /* Initializes an uninitialized block bitmap */ -void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) +static void ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, + ext4_group_t block_group, + struct ext4_group_desc *gdp) { unsigned int bit, bit_max; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -307,6 +308,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh) { + struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; ext4_fsblk_t blk; @@ -326,14 +328,14 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether block bitmap block number is set */ blk = ext4_block_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(offset, bh->b_data)) + if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; /* check whether the inode bitmap block number is set */ blk = ext4_inode_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(offset, bh->b_data)) + if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; @@ -341,18 +343,19 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, - offset + EXT4_SB(sb)->s_itb_per_group, - offset); - if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) + EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), + EXT4_B2C(sbi, offset)); + if (next_zero_bit < + EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group)) /* bad bitmap for inode tables */ return blk; return 0; } -void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - ext4_group_t block_group, - struct buffer_head *bh) +static void ext4_validate_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) { ext4_fsblk_t blk; struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); @@ -708,16 +711,6 @@ static inline int test_root(ext4_group_t a, int b) } } -static int ext4_group_sparse(ext4_group_t group) -{ - if (group <= 1) - return 1; - if (!(group & 1)) - return 0; - return (test_root(group, 7) || test_root(group, 5) || - test_root(group, 3)); -} - /** * ext4_bg_has_super - number of blocks used by the superblock in group * @sb: superblock for filesystem @@ -728,11 +721,26 @@ static int ext4_group_sparse(ext4_group_t group) */ int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) { - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + if (group == 0) + return 1; + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) { + if (group == le32_to_cpu(es->s_backup_bgs[0]) || + group == le32_to_cpu(es->s_backup_bgs[1])) + return 1; return 0; - return 1; + } + if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) + return 1; + if (!(group & 1)) + return 0; + if (test_root(group, 3) || (test_root(group, 5)) || + test_root(group, 7)) + return 1; + + return 0; } static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d638c57e996e..ef1bed66c14f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -105,7 +105,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, static int ext4_readdir(struct file *file, struct dir_context *ctx) { unsigned int offset; - int i, stored; + int i; struct ext4_dir_entry_2 *de; int err; struct inode *inode = file_inode(file); @@ -133,7 +133,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return ret; } - stored = 0; offset = ctx->pos & (sb->s_blocksize - 1); while (ctx->pos < inode->i_size) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 66946aa62127..1479e2ae00d2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -158,7 +158,6 @@ struct ext4_allocation_request { #define EXT4_MAP_MAPPED (1 << BH_Mapped) #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) -#define EXT4_MAP_UNINIT (1 << BH_Uninit) /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of * ext4_map_blocks wants to know whether or not the underlying cluster has * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that @@ -169,7 +168,7 @@ struct ext4_allocation_request { #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ - EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER) + EXT4_MAP_FROM_CLUSTER) struct ext4_map_blocks { ext4_fsblk_t m_pblk; @@ -184,7 +183,7 @@ struct ext4_map_blocks { #define EXT4_IO_END_UNWRITTEN 0x0001 /* - * For converting uninitialized extents on a work queue. 'handle' is used for + * For converting unwritten extents on a work queue. 'handle' is used for * buffered writeback. */ typedef struct ext4_io_end { @@ -537,26 +536,26 @@ enum { /* * Flags used by ext4_map_blocks() */ - /* Allocate any needed blocks and/or convert an unitialized + /* Allocate any needed blocks and/or convert an unwritten extent to be an initialized ext4 */ #define EXT4_GET_BLOCKS_CREATE 0x0001 - /* Request the creation of an unitialized extent */ -#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002 -#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ + /* Request the creation of an unwritten extent */ +#define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002 +#define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\ EXT4_GET_BLOCKS_CREATE) /* Caller is from the delayed allocation writeout path * finally doing the actual allocation of delayed blocks */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 /* caller is from the direct IO path, request to creation of an - unitialized extents if not allocated, split the uninitialized + unwritten extents if not allocated, split the unwritten extent if blocks has been preallocated already*/ #define EXT4_GET_BLOCKS_PRE_IO 0x0008 #define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ - EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) + EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) /* Convert extent to initialized after IO complete */ #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) + EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) /* Eventual metadata allocation (due to growing extent tree) * should not fail, so try to use reserved blocks for that.*/ #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 @@ -876,6 +875,8 @@ struct ext4_inode_info { struct inode vfs_inode; struct jbd2_inode *jinode; + spinlock_t i_raw_lock; /* protects updates to the raw inode */ + /* * File creation time. Its function is same as that of * struct timespec i_{a,c,m}time in the generic inode. @@ -1159,7 +1160,8 @@ struct ext4_super_block { __le32 s_usr_quota_inum; /* inode for tracking user quota */ __le32 s_grp_quota_inum; /* inode for tracking group quota */ __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ - __le32 s_reserved[108]; /* Padding to the end of the block */ + __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ + __le32 s_reserved[106]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -1505,6 +1507,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 +#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 @@ -1953,10 +1956,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb, extern ext4_group_t ext4_get_group_number(struct super_block *sb, ext4_fsblk_t block); -extern void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - ext4_group_t block_group, - struct buffer_head *bh); extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, @@ -1985,16 +1984,9 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, struct buffer_head *bh); extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group); -extern void ext4_init_block_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, - struct ext4_group_desc *desc); extern unsigned ext4_free_clusters_after_init(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp); -extern unsigned ext4_num_overhead_clusters(struct super_block *sb, - ext4_group_t block_group, - struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); /* dir.c */ @@ -2137,8 +2129,6 @@ extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); -extern int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); @@ -2198,8 +2188,6 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ extern int ext4_calculate_overhead(struct super_block *sb); -extern int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es); extern void ext4_superblock_csum_set(struct super_block *sb); extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); @@ -2571,19 +2559,11 @@ extern const struct file_operations ext4_dir_operations; extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); -extern void ext4_unwritten_wait(struct inode *inode); /* inline.c */ extern int ext4_has_inline_data(struct inode *inode); -extern int ext4_get_inline_size(struct inode *inode); extern int ext4_get_max_inline_size(struct inode *inode); extern int ext4_find_inline_data_nolock(struct inode *inode); -extern void ext4_write_inline_data(struct inode *inode, - struct ext4_iloc *iloc, - void *buffer, loff_t pos, - unsigned int len); -extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, - unsigned int len); extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, unsigned int len); extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); @@ -2771,23 +2751,20 @@ extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc); + struct writeback_control *wbc, + bool keep_towrite); /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); -extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); -extern int ext4_mmp_csum_verify(struct super_block *sb, - struct mmp_struct *mmp); /* * Note that these flags will never ever appear in a buffer_head's state flag. * See EXT4_MAP_... to see where this is used. */ enum ext4_state_bits { - BH_Uninit /* blocks are allocated but uninitialized on disk */ - = BH_JBDPrivateStart, - BH_AllocFromCluster, /* allocated blocks were part of already + BH_AllocFromCluster /* allocated blocks were part of already * allocated cluster. */ + = BH_JBDPrivateStart }; /* diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 5074fe23f19e..a867f5ca9991 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -137,21 +137,21 @@ struct ext4_ext_path { * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an * initialized extent. This is 2^15 and not (2^16 - 1), since we use the * MSB of ee_len field in the extent datastructure to signify if this - * particular extent is an initialized extent or an uninitialized (i.e. + * particular extent is an initialized extent or an unwritten (i.e. * preallocated). - * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an - * uninitialized extent. + * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an + * unwritten extent. * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an - * uninitialized one. In other words, if MSB of ee_len is set, it is an - * uninitialized extent with only one special scenario when ee_len = 0x8000. - * In this case we can not have an uninitialized extent of zero length and + * unwritten one. In other words, if MSB of ee_len is set, it is an + * unwritten extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an unwritten extent of zero length and * thus we make it as a special case of initialized extent with 0x8000 length. * This way we get better extent-to-group alignment for initialized extents. * Hence, the maximum number of blocks we can have in an *initialized* - * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767). */ #define EXT_INIT_MAX_LEN (1UL << 15) -#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) +#define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ @@ -187,14 +187,14 @@ static inline unsigned short ext_depth(struct inode *inode) return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); } -static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext) { - /* We can not have an uninitialized extent of zero length! */ + /* We can not have an unwritten extent of zero length! */ BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } -static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +static inline int ext4_ext_is_unwritten(struct ext4_extent *ext) { /* Extent with ee_len of 0x8000 is treated as an initialized extent */ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c3fb607413ed..0074e0d23d6e 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -122,9 +122,10 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, return handle; } -void ext4_journal_abort_handle(const char *caller, unsigned int line, - const char *err_fn, struct buffer_head *bh, - handle_t *handle, int err) +static void ext4_journal_abort_handle(const char *caller, unsigned int line, + const char *err_fn, + struct buffer_head *bh, + handle_t *handle, int err) { char nbuf[16]; const char *errstr = ext4_decode_error(NULL, err, nbuf); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 81cfefa9dc0c..17c00ff202f2 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -231,10 +231,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); /* * Wrapper functions with which ext4 calls into JBD. */ -void ext4_journal_abort_handle(const char *caller, unsigned int line, - const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 01b0c208f625..4da228a0e6d0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -50,8 +50,8 @@ */ #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ due to ENOSPC */ -#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ -#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ +#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ #define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ @@ -143,6 +143,7 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, { if (path->p_bh) { /* path points to block */ + BUFFER_TRACE(path->p_bh, "get_write_access"); return ext4_journal_get_write_access(handle, path->p_bh); } /* path points to leaf/index in inode body */ @@ -524,7 +525,7 @@ __read_extent_tree_block(const char *function, unsigned int line, lblk - prev, ~0, EXTENT_STATUS_HOLE); - if (ext4_ext_is_uninitialized(ex)) + if (ext4_ext_is_unwritten(ex)) status = EXTENT_STATUS_UNWRITTEN; ext4_es_cache_extent(inode, lblk, len, ext4_ext_pblock(ex), status); @@ -620,7 +621,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) } else if (path->p_ext) { ext_debug(" %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), - ext4_ext_is_uninitialized(path->p_ext), + ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext), ext4_ext_pblock(path->p_ext)); } else @@ -646,7 +647,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); } ext_debug("\n"); @@ -677,7 +678,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), newblock); ex++; @@ -802,7 +803,7 @@ ext4_ext_binsearch(struct inode *inode, ext_debug(" -> %d:%llu:[%d]%d ", le32_to_cpu(path->p_ext->ee_block), ext4_ext_pblock(path->p_ext), - ext4_ext_is_uninitialized(path->p_ext), + ext4_ext_is_unwritten(path->p_ext), ext4_ext_get_actual_len(path->p_ext)); #ifdef CHECK_BINSEARCH @@ -1686,11 +1687,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, /* * Make sure that both extents are initialized. We don't merge - * uninitialized extents so that we can be sure that end_io code has + * unwritten extents so that we can be sure that end_io code has * the extent that was written properly split out and conversion to * initialized is trivial. */ - if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) + if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) return 0; ext1_ee_len = ext4_ext_get_actual_len(ex1); @@ -1707,10 +1708,10 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, */ if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; - if (ext4_ext_is_uninitialized(ex1) && + if (ext4_ext_is_unwritten(ex1) && (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || atomic_read(&EXT4_I(inode)->i_unwritten) || - (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) + (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN))) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -1735,7 +1736,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, { struct ext4_extent_header *eh; unsigned int depth, len; - int merge_done = 0, uninit; + int merge_done = 0, unwritten; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1745,11 +1746,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */ - uninit = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(ex + 1)); - if (uninit) - ext4_ext_mark_uninitialized(ex); + if (unwritten) + ext4_ext_mark_unwritten(ex); if (ex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - ex - 1) @@ -1903,7 +1904,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *npath = NULL; int depth, len, err; ext4_lblk_t next; - int mb_flags = 0, uninit; + int mb_flags = 0, unwritten; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); @@ -1943,21 +1944,21 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, if (ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append [%d]%d block to %u:[%d]%d" "(from %llu)\n", - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) return err; - uninit = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninit) - ext4_ext_mark_uninitialized(ex); + if (unwritten) + ext4_ext_mark_unwritten(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1969,10 +1970,10 @@ prepend: ext_debug("prepend %u[%d]%d block to %u:[%d]%d" "(from %llu)\n", le32_to_cpu(newext->ee_block), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), - ext4_ext_is_uninitialized(ex), + ext4_ext_is_unwritten(ex), ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, @@ -1980,13 +1981,13 @@ prepend: if (err) return err; - uninit = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); ex->ee_block = newext->ee_block; ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); - if (uninit) - ext4_ext_mark_uninitialized(ex); + if (unwritten) + ext4_ext_mark_unwritten(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -2046,7 +2047,7 @@ has_space: ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext)); nearex = EXT_FIRST_EXTENT(eh); } else { @@ -2057,7 +2058,7 @@ has_space: "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), nearex); nearex++; @@ -2068,7 +2069,7 @@ has_space: "nearest %p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), nearex); } @@ -2078,7 +2079,7 @@ has_space: "move %d extents from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext4_ext_pblock(newext), - ext4_ext_is_uninitialized(newext), + ext4_ext_is_unwritten(newext), ext4_ext_get_actual_len(newext), len, nearex, nearex + 1); memmove(nearex + 1, nearex, @@ -2200,7 +2201,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, es.es_lblk = le32_to_cpu(ex->ee_block); es.es_len = ext4_ext_get_actual_len(ex); es.es_pblk = ext4_ext_pblock(ex); - if (ext4_ext_is_uninitialized(ex)) + if (ext4_ext_is_unwritten(ex)) flags |= FIEMAP_EXTENT_UNWRITTEN; } @@ -2576,7 +2577,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned num; ext4_lblk_t ex_ee_block; unsigned short ex_ee_len; - unsigned uninitialized = 0; + unsigned unwritten = 0; struct ext4_extent *ex; ext4_fsblk_t pblk; @@ -2623,13 +2624,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; + if (ext4_ext_is_unwritten(ex)) + unwritten = 1; else - uninitialized = 0; + unwritten = 0; ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, - uninitialized, ex_ee_len); + unwritten, ex_ee_len); path[depth].p_ext = ex; a = ex_ee_block > start ? ex_ee_block : start; @@ -2701,11 +2702,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex->ee_len = cpu_to_le16(num); /* - * Do not mark uninitialized if all the blocks in the + * Do not mark unwritten if all the blocks in the * extent have been removed. */ - if (uninitialized && num) - ext4_ext_mark_uninitialized(ex); + if (unwritten && num) + ext4_ext_mark_unwritten(ex); /* * If the extent was completely released, * we need to remove it from the leaf @@ -2854,9 +2855,9 @@ again: end < ee_block + ext4_ext_get_actual_len(ex) - 1) { int split_flag = 0; - if (ext4_ext_is_uninitialized(ex)) - split_flag = EXT4_EXT_MARK_UNINIT1 | - EXT4_EXT_MARK_UNINIT2; + if (ext4_ext_is_unwritten(ex)) + split_flag = EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2; /* * Split the extent in two so that 'end' is the last @@ -3113,7 +3114,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * @path: the path to the extent * @split: the logical block where the extent is splitted. * @split_flags: indicates if the extent could be zeroout if split fails, and - * the states(init or uninit) of new extents. + * the states(init or unwritten) of new extents. * @flags: flags used to insert new extent to extent tree. * * @@ -3155,10 +3156,10 @@ static int ext4_split_extent_at(handle_t *handle, newblock = split - ee_block + ext4_ext_pblock(ex); BUG_ON(split < ee_block || split >= (ee_block + ee_len)); - BUG_ON(!ext4_ext_is_uninitialized(ex) && + BUG_ON(!ext4_ext_is_unwritten(ex) && split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_MARK_UNINIT1 | - EXT4_EXT_MARK_UNINIT2)); + EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3170,8 +3171,8 @@ static int ext4_split_extent_at(handle_t *handle, * then we just change the state of the extent, and splitting * is not needed. */ - if (split_flag & EXT4_EXT_MARK_UNINIT2) - ext4_ext_mark_uninitialized(ex); + if (split_flag & EXT4_EXT_MARK_UNWRIT2) + ext4_ext_mark_unwritten(ex); else ext4_ext_mark_initialized(ex); @@ -3185,8 +3186,8 @@ static int ext4_split_extent_at(handle_t *handle, /* case a */ memcpy(&orig_ex, ex, sizeof(orig_ex)); ex->ee_len = cpu_to_le16(split - ee_block); - if (split_flag & EXT4_EXT_MARK_UNINIT1) - ext4_ext_mark_uninitialized(ex); + if (split_flag & EXT4_EXT_MARK_UNWRIT1) + ext4_ext_mark_unwritten(ex); /* * path may lead to new leaf, not to original leaf any more @@ -3200,8 +3201,8 @@ static int ext4_split_extent_at(handle_t *handle, ex2->ee_block = cpu_to_le32(split); ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); ext4_ext_store_pblock(ex2, newblock); - if (split_flag & EXT4_EXT_MARK_UNINIT2) - ext4_ext_mark_uninitialized(ex2); + if (split_flag & EXT4_EXT_MARK_UNWRIT2) + ext4_ext_mark_unwritten(ex2); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { @@ -3278,7 +3279,7 @@ static int ext4_split_extent(handle_t *handle, struct ext4_extent *ex; unsigned int ee_len, depth; int err = 0; - int uninitialized; + int unwritten; int split_flag1, flags1; int allocated = map->m_len; @@ -3286,14 +3287,14 @@ static int ext4_split_extent(handle_t *handle, ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - uninitialized = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); if (map->m_lblk + map->m_len < ee_block + ee_len) { split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; - if (uninitialized) - split_flag1 |= EXT4_EXT_MARK_UNINIT1 | - EXT4_EXT_MARK_UNINIT2; + if (unwritten) + split_flag1 |= EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2; if (split_flag & EXT4_EXT_DATA_VALID2) split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, @@ -3318,15 +3319,15 @@ static int ext4_split_extent(handle_t *handle, (unsigned long) map->m_lblk); return -EIO; } - uninitialized = ext4_ext_is_uninitialized(ex); + unwritten = ext4_ext_is_unwritten(ex); split_flag1 = 0; if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; - if (uninitialized) { - split_flag1 |= EXT4_EXT_MARK_UNINIT1; + if (unwritten) { + split_flag1 |= EXT4_EXT_MARK_UNWRIT1; split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_MARK_UNINIT2); + EXT4_EXT_MARK_UNWRIT2); } err = ext4_split_extent_at(handle, inode, path, map->m_lblk, split_flag1, flags); @@ -3341,16 +3342,16 @@ out: /* * This function is called by ext4_ext_map_blocks() if someone tries to write - * to an uninitialized extent. It may result in splitting the uninitialized + * to an unwritten extent. It may result in splitting the unwritten * extent into multiple extents (up to three - one initialized and two - * uninitialized). + * unwritten). * There are three possibilities: * a> There is no split required: Entire extent should be initialized * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * Pre-conditions: - * - The extent pointed to by 'path' is uninitialized. + * - The extent pointed to by 'path' is unwritten. * - The extent pointed to by 'path' contains a superset * of the logical span [map->m_lblk, map->m_lblk + map->m_len). * @@ -3396,12 +3397,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); /* Pre-conditions */ - BUG_ON(!ext4_ext_is_uninitialized(ex)); + BUG_ON(!ext4_ext_is_unwritten(ex)); BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); /* * Attempt to transfer newly initialized blocks from the currently - * uninitialized extent to its neighbor. This is much cheaper + * unwritten extent to its neighbor. This is much cheaper * than an insertion followed by a merge as those involve costly * memmove() calls. Transferring to the left is the common case in * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) @@ -3437,7 +3438,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * - C4: abut_ex can receive the additional blocks without * overflowing the (initialized) length limit. */ - if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ + if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/ ((prev_lblk + prev_len) == ee_block) && /*C2*/ ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ @@ -3452,7 +3453,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_block = cpu_to_le32(ee_block + map_len); ext4_ext_store_pblock(ex, ee_pblk + map_len); ex->ee_len = cpu_to_le16(ee_len - map_len); - ext4_ext_mark_uninitialized(ex); /* Restore the flag */ + ext4_ext_mark_unwritten(ex); /* Restore the flag */ /* Extend abut_ex by 'map_len' blocks */ abut_ex->ee_len = cpu_to_le16(prev_len + map_len); @@ -3483,7 +3484,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * - C4: abut_ex can receive the additional blocks without * overflowing the (initialized) length limit. */ - if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ + if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/ ((map->m_lblk + map_len) == next_lblk) && /*C2*/ ((ee_pblk + ee_len) == next_pblk) && /*C3*/ (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ @@ -3498,7 +3499,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, abut_ex->ee_block = cpu_to_le32(next_lblk - map_len); ext4_ext_store_pblock(abut_ex, next_pblk - map_len); ex->ee_len = cpu_to_le16(ee_len - map_len); - ext4_ext_mark_uninitialized(ex); /* Restore the flag */ + ext4_ext_mark_unwritten(ex); /* Restore the flag */ /* Extend abut_ex by 'map_len' blocks */ abut_ex->ee_len = cpu_to_le16(next_len + map_len); @@ -3603,26 +3604,26 @@ out: /* * This function is called by ext4_ext_map_blocks() from * ext4_get_blocks_dio_write() when DIO to write - * to an uninitialized extent. + * to an unwritten extent. * - * Writing to an uninitialized extent may result in splitting the uninitialized - * extent into multiple initialized/uninitialized extents (up to three) + * Writing to an unwritten extent may result in splitting the unwritten + * extent into multiple initialized/unwritten extents (up to three) * There are three possibilities: - * a> There is no split required: Entire extent should be uninitialized + * a> There is no split required: Entire extent should be unwritten * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * This works the same way in the case of initialized -> unwritten conversion. * * One of more index blocks maybe needed if the extent tree grow after - * the uninitialized extent split. To prevent ENOSPC occur at the IO - * complete, we need to split the uninitialized extent before DIO submit - * the IO. The uninitialized extent called at this time will be split - * into three uninitialized extent(at most). After IO complete, the part + * the unwritten extent split. To prevent ENOSPC occur at the IO + * complete, we need to split the unwritten extent before DIO submit + * the IO. The unwritten extent called at this time will be split + * into three unwritten extent(at most). After IO complete, the part * being filled will be convert to initialized by the end_io callback function * via ext4_convert_unwritten_extents(). * - * Returns the size of uninitialized extent to be written on success. + * Returns the size of unwritten extent to be written on success. */ static int ext4_split_convert_extents(handle_t *handle, struct inode *inode, @@ -3660,7 +3661,7 @@ static int ext4_split_convert_extents(handle_t *handle, } else if (flags & EXT4_GET_BLOCKS_CONVERT) { split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); + split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); } flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); @@ -3710,8 +3711,8 @@ static int ext4_convert_initialized_extents(handle_t *handle, err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - /* first mark the extent as uninitialized */ - ext4_ext_mark_uninitialized(ex); + /* first mark the extent as unwritten */ + ext4_ext_mark_unwritten(ex); /* note: ext4_ext_correct_indexes() isn't needed here because * borders are not changed @@ -3971,10 +3972,10 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, /* * Make sure that the extent is no bigger than we support with - * uninitialized extent + * unwritten extent */ - if (map->m_len > EXT_UNINIT_MAX_LEN) - map->m_len = EXT_UNINIT_MAX_LEN / 2; + if (map->m_len > EXT_UNWRITTEN_MAX_LEN) + map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; ret = ext4_convert_initialized_extents(handle, inode, map, path); @@ -3993,7 +3994,7 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, } static int -ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, +ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, unsigned int allocated, ext4_fsblk_t newblock) @@ -4002,23 +4003,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, int err = 0; ext4_io_end_t *io = ext4_inode_aio(inode); - ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " + ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical " "block %llu, max_blocks %u, flags %x, allocated %u\n", inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, flags, allocated); ext4_ext_show_leaf(inode, path); /* - * When writing into uninitialized space, we should not fail to + * When writing into unwritten space, we should not fail to * allocate metadata blocks for the new extent block if needed. */ flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; - trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, + trace_ext4_ext_handle_unwritten_extents(inode, map, flags, allocated, newblock); /* get_block() before submit the IO, split the extent */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { + if (flags & EXT4_GET_BLOCKS_PRE_IO) { ret = ext4_split_convert_extents(handle, inode, map, path, flags | EXT4_GET_BLOCKS_CONVERT); if (ret <= 0) @@ -4033,12 +4034,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); map->m_flags |= EXT4_MAP_UNWRITTEN; - if (ext4_should_dioread_nolock(inode)) - map->m_flags |= EXT4_MAP_UNINIT; goto out; } /* IO end_io complete, convert the filled extent to written */ - if ((flags & EXT4_GET_BLOCKS_CONVERT)) { + if (flags & EXT4_GET_BLOCKS_CONVERT) { ret = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (ret >= 0) { @@ -4059,7 +4058,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * repeat fallocate creation request * we already have an unwritten extent */ - if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) { + if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { map->m_flags |= EXT4_MAP_UNWRITTEN; goto map_out; } @@ -4310,7 +4309,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* - * Uninitialized extents are treated as holes, except that + * unwritten extents are treated as holes, except that * we split out initialized portions during a write. */ ee_len = ext4_ext_get_actual_len(ex); @@ -4329,16 +4328,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * If the extent is initialized check whether the * caller wants to convert it to unwritten. */ - if ((!ext4_ext_is_uninitialized(ex)) && + if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { allocated = ext4_ext_convert_initialized_extent( handle, inode, map, path, flags, allocated, newblock); goto out2; - } else if (!ext4_ext_is_uninitialized(ex)) + } else if (!ext4_ext_is_unwritten(ex)) goto out; - ret = ext4_ext_handle_uninitialized_extents( + ret = ext4_ext_handle_unwritten_extents( handle, inode, map, path, flags, allocated, newblock); if (ret < 0) @@ -4410,15 +4409,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* * See if request is beyond maximum number of blocks we can have in * a single extent. For an initialized extent this limit is - * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is - * EXT_UNINIT_MAX_LEN. + * EXT_INIT_MAX_LEN and for an unwritten extent this limit is + * EXT_UNWRITTEN_MAX_LEN. */ if (map->m_len > EXT_INIT_MAX_LEN && - !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) + !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) map->m_len = EXT_INIT_MAX_LEN; - else if (map->m_len > EXT_UNINIT_MAX_LEN && - (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) - map->m_len = EXT_UNINIT_MAX_LEN; + else if (map->m_len > EXT_UNWRITTEN_MAX_LEN && + (flags & EXT4_GET_BLOCKS_UNWRIT_EXT)) + map->m_len = EXT_UNWRITTEN_MAX_LEN; /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ newex.ee_len = cpu_to_le16(map->m_len); @@ -4466,21 +4465,19 @@ got_allocated_blocks: /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock + offset); newex.ee_len = cpu_to_le16(ar.len); - /* Mark uninitialized */ - if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ - ext4_ext_mark_uninitialized(&newex); + /* Mark unwritten */ + if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){ + ext4_ext_mark_unwritten(&newex); map->m_flags |= EXT4_MAP_UNWRITTEN; /* * io_end structure was created for every IO write to an - * uninitialized extent. To avoid unnecessary conversion, + * unwritten extent. To avoid unnecessary conversion, * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state * that we need to perform conversion when IO is done. */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) + if (flags & EXT4_GET_BLOCKS_PRE_IO) set_unwritten = 1; - if (ext4_should_dioread_nolock(inode)) - map->m_flags |= EXT4_MAP_UNINIT; } err = 0; @@ -4607,9 +4604,9 @@ got_allocated_blocks: /* * Cache the extent and update transaction to commit on fdatasync only - * when it is _not_ an uninitialized extent. + * when it is _not_ an unwritten extent. */ - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) + if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0) ext4_update_inode_fsync_trans(handle, inode, 1); else ext4_update_inode_fsync_trans(handle, inode, 0); @@ -4683,7 +4680,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, * that it doesn't get unnecessarily split into multiple * extents. */ - if (len <= EXT_UNINIT_MAX_LEN) + if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; /* @@ -4744,6 +4741,13 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (!S_ISREG(inode->i_mode)) return -EINVAL; + /* Call ext4_force_commit to flush all data in case of data=journal. */ + if (ext4_should_journal_data(inode)) { + ret = ext4_force_commit(inode->i_sb); + if (ret) + return ret; + } + /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -4775,7 +4779,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, else max_blocks -= lblk; - flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | + flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -4918,7 +4922,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - lblk; - flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; + flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 0ebc21204b51..3f5c188953a4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -344,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1, if (ext4_es_status(es1) != ext4_es_status(es2)) return 0; - if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) + if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { + pr_warn("ES assertion failed when merging extents. " + "The sum of lengths of es1 (%d) and es2 (%d) " + "is bigger than allowed file size (%d)\n", + es1->es_len, es2->es_len, EXT_MAX_BLOCKS); + WARN_ON(1); return 0; + } if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) return 0; @@ -433,7 +439,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, ee_start = ext4_ext_pblock(ex); ee_len = ext4_ext_get_actual_len(ex); - ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; + ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0; es_status = ext4_es_is_unwritten(es) ? 1 : 0; /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 063fc1538355..4e8bc284ec0e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -57,7 +57,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) return 0; } -void ext4_unwritten_wait(struct inode *inode) +static void ext4_unwritten_wait(struct inode *inode) { wait_queue_head_t *wq = ext4_ioend_wq(inode); @@ -92,58 +92,91 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, } static ssize_t -ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ext4_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct inode *inode = file_inode(iocb->ki_filp); + struct mutex *aio_mutex = NULL; struct blk_plug plug; - int unaligned_aio = 0; - ssize_t ret; + int o_direct = file->f_flags & O_DIRECT; int overwrite = 0; size_t length = iov_length(iov, nr_segs); + ssize_t ret; - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && - !is_sync_kiocb(iocb)) - unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); + BUG_ON(iocb->ki_pos != pos); - /* Unaligned direct AIO must be serialized; see comment above */ - if (unaligned_aio) { - mutex_lock(ext4_aio_mutex(inode)); + /* + * Unaligned direct AIO must be serialized; see comment above + * In the case of O_APPEND, assume that we must always serialize + */ + if (o_direct && + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + !is_sync_kiocb(iocb) && + (file->f_flags & O_APPEND || + ext4_unaligned_aio(inode, iov, nr_segs, pos))) { + aio_mutex = ext4_aio_mutex(inode); + mutex_lock(aio_mutex); ext4_unwritten_wait(inode); } - BUG_ON(iocb->ki_pos != pos); - mutex_lock(&inode->i_mutex); - blk_start_plug(&plug); + if (file->f_flags & O_APPEND) + iocb->ki_pos = pos = i_size_read(inode); + + /* + * If we have encountered a bitmap-format file, the size limit + * is smaller than s_maxbytes, which is for extent-mapped files. + */ + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - iocb->private = &overwrite; + if ((pos > sbi->s_bitmap_maxbytes) || + (pos == sbi->s_bitmap_maxbytes && length > 0)) { + mutex_unlock(&inode->i_mutex); + ret = -EFBIG; + goto errout; + } - /* check whether we do a DIO overwrite or not */ - if (ext4_should_dioread_nolock(inode) && !unaligned_aio && - !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { - struct ext4_map_blocks map; - unsigned int blkbits = inode->i_blkbits; - int err, len; + if (pos + length > sbi->s_bitmap_maxbytes) { + nr_segs = iov_shorten((struct iovec *)iov, nr_segs, + sbi->s_bitmap_maxbytes - pos); + } + } - map.m_lblk = pos >> blkbits; - map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) - - map.m_lblk; - len = map.m_len; + if (o_direct) { + blk_start_plug(&plug); - err = ext4_map_blocks(NULL, inode, &map, 0); - /* - * 'err==len' means that all of blocks has been preallocated no - * matter they are initialized or not. For excluding - * uninitialized extents, we need to check m_flags. There are - * two conditions that indicate for initialized extents. - * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned; - * 2) If we do a real lookup, non-flags are returned. - * So we should check these two conditions. - */ - if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) - overwrite = 1; + iocb->private = &overwrite; + + /* check whether we do a DIO overwrite or not */ + if (ext4_should_dioread_nolock(inode) && !aio_mutex && + !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { + struct ext4_map_blocks map; + unsigned int blkbits = inode->i_blkbits; + int err, len; + + map.m_lblk = pos >> blkbits; + map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) + - map.m_lblk; + len = map.m_len; + + err = ext4_map_blocks(NULL, inode, &map, 0); + /* + * 'err==len' means that all of blocks has + * been preallocated no matter they are + * initialized or not. For excluding + * unwritten extents, we need to check + * m_flags. There are two conditions that + * indicate for initialized extents. 1) If we + * hit extent cache, EXT4_MAP_MAPPED flag is + * returned; 2) If we do a real lookup, + * non-flags are returned. So we should check + * these two conditions. + */ + if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) + overwrite = 1; + } } ret = __generic_file_aio_write(iocb, iov, nr_segs); @@ -156,45 +189,12 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0) ret = err; } - blk_finish_plug(&plug); - - if (unaligned_aio) - mutex_unlock(ext4_aio_mutex(inode)); - - return ret; -} - -static ssize_t -ext4_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct inode *inode = file_inode(iocb->ki_filp); - ssize_t ret; - - /* - * If we have encountered a bitmap-format file, the size limit - * is smaller than s_maxbytes, which is for extent-mapped files. - */ - - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - size_t length = iov_length(iov, nr_segs); - - if ((pos > sbi->s_bitmap_maxbytes || - (pos == sbi->s_bitmap_maxbytes && length > 0))) - return -EFBIG; - - if (pos + length > sbi->s_bitmap_maxbytes) { - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, - sbi->s_bitmap_maxbytes - pos); - } - } - - if (unlikely(iocb->ki_filp->f_flags & O_DIRECT)) - ret = ext4_file_dio_write(iocb, iov, nr_segs, pos); - else - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (o_direct) + blk_finish_plug(&plug); +errout: + if (aio_mutex) + mutex_unlock(aio_mutex); return ret; } @@ -244,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); if (IS_ERR(handle)) return PTR_ERR(handle); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) { ext4_journal_stop(handle); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 82edf5b93352..645205d8ada6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -22,7 +22,7 @@ #define EXT4_INLINE_DOTDOT_OFFSET 2 #define EXT4_INLINE_DOTDOT_SIZE 4 -int ext4_get_inline_size(struct inode *inode) +static int ext4_get_inline_size(struct inode *inode) { if (EXT4_I(inode)->i_inline_off) return EXT4_I(inode)->i_inline_size; @@ -211,8 +211,8 @@ out: * value since it is already handled by ext4_xattr_ibody_inline_set. * That saves us one memcpy. */ -void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, - void *buffer, loff_t pos, unsigned int len) +static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, + void *buffer, loff_t pos, unsigned int len) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; @@ -264,6 +264,7 @@ static int ext4_create_inline_data(handle_t *handle, if (error) return error; + BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, is.iloc.bh); if (error) goto out; @@ -347,6 +348,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, if (error == -ENODATA) goto out; + BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, is.iloc.bh); if (error) goto out; @@ -373,8 +375,8 @@ out: return error; } -int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, - unsigned int len) +static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, + unsigned int len) { int ret, size; struct ext4_inode_info *ei = EXT4_I(inode); @@ -424,6 +426,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, if (error) goto out; + BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, is.iloc.bh); if (error) goto out; @@ -1007,6 +1010,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, if (err) return err; + BUFFER_TRACE(iloc->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, iloc->bh); if (err) return err; @@ -1669,6 +1673,7 @@ int ext4_delete_inline_entry(handle_t *handle, EXT4_MIN_INLINE_DATA_SIZE; } + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d7b7462a0e13..7fcd68ee9155 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) int ea_blocks = EXT4_I(inode)->i_file_acl ? EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; + if (ext4_has_inline_data(inode)) + return 0; + return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } @@ -443,7 +446,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * could be converted. */ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -489,8 +492,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping * based files * - * On success, it returns the number of blocks being mapped or allocate. - * if create==0 and the blocks are pre-allocated and uninitialized block, + * On success, it returns the number of blocks being mapped or allocated. + * if create==0 and the blocks are pre-allocated and unwritten block, * the result buffer head is unmapped. If the create ==1, it will make sure * the buffer head is mapped. * @@ -555,7 +558,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * file system block. */ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags & EXT4_GET_BLOCKS_KEEP_SIZE); @@ -622,12 +625,12 @@ found: map->m_flags &= ~EXT4_MAP_FLAGS; /* - * New blocks allocate and/or writing to uninitialized extent + * New blocks allocate and/or writing to unwritten extent * will possibly result in updating i_data, so we take * the write lock of i_data_sem, and call get_blocks() * with create == 1 flag. */ - down_write((&EXT4_I(inode)->i_data_sem)); + down_write(&EXT4_I(inode)->i_data_sem); /* * if the caller is from delayed allocation writeout path @@ -922,6 +925,7 @@ int do_journal_get_write_access(handle_t *handle, */ if (dirty) clear_buffer_dirty(bh); + BUFFER_TRACE(bh, "get write access"); ret = ext4_journal_get_write_access(handle, bh); if (!ret && dirty) ret = ext4_handle_dirty_metadata(handle, NULL, bh); @@ -1540,7 +1544,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ext4_es_lru_add(inode); if (ext4_es_is_hole(&es)) { retval = 0; - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1577,7 +1581,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, * Try to see if we can get the block without requesting a new * file system block. */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); if (ext4_has_inline_data(inode)) { /* * We will soon create blocks for this page, and let @@ -1769,6 +1773,7 @@ static int __ext4_journalled_writepage(struct page *page, BUG_ON(!ext4_handle_valid(handle)); if (inline_data) { + BUFFER_TRACE(inode_bh, "get write access"); ret = ext4_journal_get_write_access(handle, inode_bh); err = ext4_handle_dirty_metadata(handle, inode, inode_bh); @@ -1846,6 +1851,7 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; struct ext4_io_submit io_submit; + bool keep_towrite = false; trace_ext4_writepage(page); size = i_size_read(inode); @@ -1876,6 +1882,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return 0; } + keep_towrite = true; } if (PageChecked(page) && ext4_should_journal_data(inode)) @@ -1892,7 +1899,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc); + ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -1911,7 +1918,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) else len = PAGE_CACHE_SIZE; clear_page_dirty_for_io(page); - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); + err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; @@ -2032,7 +2039,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, * Scan buffers corresponding to changed extent (we expect corresponding pages * to be already locked) and update buffer state according to new extent state. * We map delalloc buffers to their physical location, clear unwritten bits, - * and mark buffers as uninit when we perform writes to uninitialized extents + * and mark buffers as uninit when we perform writes to unwritten extents * and do extent conversion after IO is finished. If the last page is not fully * mapped, we update @map to the next extent in the last page that needs * mapping. Otherwise we submit the page for IO. @@ -2126,12 +2133,12 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) struct inode *inode = mpd->inode; struct ext4_map_blocks *map = &mpd->map; int get_blocks_flags; - int err; + int err, dioread_nolock; trace_ext4_da_write_pages_extent(inode, map); /* * Call ext4_map_blocks() to allocate any delayed allocation blocks, or - * to convert an uninitialized extent to be initialized (in the case + * to convert an unwritten extent to be initialized (in the case * where we have written into one or more preallocated blocks). It is * possible that we're going to need more metadata blocks than * previously reserved. However we must not fail because we're in @@ -2148,7 +2155,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) */ get_blocks_flags = EXT4_GET_BLOCKS_CREATE | EXT4_GET_BLOCKS_METADATA_NOFAIL; - if (ext4_should_dioread_nolock(inode)) + dioread_nolock = ext4_should_dioread_nolock(inode); + if (dioread_nolock) get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; if (map->m_flags & (1 << BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; @@ -2156,7 +2164,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) err = ext4_map_blocks(handle, inode, map, get_blocks_flags); if (err < 0) return err; - if (map->m_flags & EXT4_MAP_UNINIT) { + if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { if (!mpd->io_submit.io_end->handle && ext4_handle_valid(handle)) { mpd->io_submit.io_end->handle = handle->h_rsv_handle; @@ -3070,9 +3078,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * preallocated extents, and those write extend the file, no need to * fall back to buffered IO. * - * For holes, we fallocate those blocks, mark them as uninitialized + * For holes, we fallocate those blocks, mark them as unwritten * If those blocks were preallocated, we mark sure they are split, but - * still keep the range to write as uninitialized. + * still keep the range to write as unwritten. * * The unwritten extents will be converted to written when DIO is completed. * For async direct IO, since the IO may still pending when return, we @@ -3124,12 +3132,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * We could direct write to holes and fallocate. * * Allocated blocks to fill the hole are marked as - * uninitialized to prevent parallel buffered read to expose + * unwritten to prevent parallel buffered read to expose * the stale data before DIO complete the data IO. * * As to previously fallocated extents, ext4 get_block will * just simply mark the buffer mapped but still keep the - * extents uninitialized. + * extents unwritten. * * For non AIO case, we will convert those unwritten extents * to written after return back from blockdev_direct_IO. @@ -3440,7 +3448,7 @@ unlock: * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ -int ext4_block_truncate_page(handle_t *handle, +static int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from) { unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -4304,12 +4312,15 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; + struct super_block *sb = inode->i_sb; int err = 0, rc, block; - int need_datasync = 0; + int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; - /* For fields not not tracking in the in-memory inode, + spin_lock(&ei->i_raw_lock); + + /* For fields not tracked in the in-memory inode, * initialise them to zero for new inodes. */ if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); @@ -4347,8 +4358,10 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - if (ext4_inode_blocks_set(handle, raw_inode, ei)) + if (ext4_inode_blocks_set(handle, raw_inode, ei)) { + spin_unlock(&ei->i_raw_lock); goto out_brelse; + } raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) @@ -4360,24 +4373,11 @@ static int ext4_do_update_inode(handle_t *handle, need_datasync = 1; } if (ei->i_disksize > 0x7fffffffULL) { - struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || EXT4_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT4_GOOD_OLD_REV)) { - /* If this is the first large file - * created, add a flag to the superblock. - */ - err = ext4_journal_get_write_access(handle, - EXT4_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE); - ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); - } + cpu_to_le32(EXT4_GOOD_OLD_REV)) + set_large_file = 1; } raw_inode->i_generation = cpu_to_le32(inode->i_generation); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -4409,12 +4409,24 @@ static int ext4_do_update_inode(handle_t *handle, ext4_inode_csum_set(inode, raw_inode, ei); + spin_unlock(&ei->i_raw_lock); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - + if (set_large_file) { + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); + err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + if (err) + goto out_brelse; + ext4_update_dynamic_rev(sb); + EXT4_SET_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE); + ext4_handle_sync(handle); + err = ext4_handle_dirty_super(handle, sb); + } ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c8238a26818c..59e31622cc6e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1044,6 +1044,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) * allocating. If we are looking at the buddy cache we would * have taken a reference using ext4_mb_load_buddy and that * would have pinned buddy page to page cache. + * The call to ext4_mb_get_buddy_page_lock will mark the + * page accessed. */ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { @@ -1062,7 +1064,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); if (e4b.bd_buddy_page == NULL) { /* @@ -1082,7 +1083,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); err: ext4_mb_put_buddy_page_lock(&e4b); return ret; @@ -1141,7 +1141,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, /* we could use find_or_create_page(), but it locks page * what we'd like to avoid in fast path ... */ - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) /* @@ -1176,15 +1176,16 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_bitmap_page = page; e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); block++; pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) page_cache_release(page); @@ -1209,9 +1210,10 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_buddy_page = page; e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); @@ -2617,7 +2619,7 @@ int ext4_mb_init(struct super_block *sb) sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { ret = -ENOMEM; - goto out_free_groupinfo_slab; + goto out; } for_each_possible_cpu(i) { struct ext4_locality_group *lg; @@ -2642,8 +2644,6 @@ int ext4_mb_init(struct super_block *sb) out_free_locality_groups: free_percpu(sbi->s_locality_groups); sbi->s_locality_groups = NULL; -out_free_groupinfo_slab: - ext4_groupinfo_destroy_slabs(); out: kfree(sbi->s_mb_offsets); sbi->s_mb_offsets = NULL; @@ -2876,6 +2876,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!bitmap_bh) goto out_err; + BUFFER_TRACE(bitmap_bh, "getting write access"); err = ext4_journal_get_write_access(handle, bitmap_bh); if (err) goto out_err; @@ -2888,6 +2889,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, ext4_free_group_clusters(sb, gdp)); + BUFFER_TRACE(gdp_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdp_bh); if (err) goto out_err; @@ -3145,7 +3147,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); - BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); + BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 2ae73a80c19b..ec092437d3e0 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -505,7 +505,7 @@ int ext4_ext_migrate(struct inode *inode) * with i_data_sem held to prevent racing with block * allocation. */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); up_read((&EXT4_I(inode)->i_data_sem)); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 04434ad3e8e0..32bce844c2e1 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -18,7 +18,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) return cpu_to_le32(csum); } -int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) +static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) { if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -27,7 +27,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); } -void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) +static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) { if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 58ee7dc87669..2484c7ec6a72 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -57,8 +57,8 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, static void copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) { - if (ext4_ext_is_uninitialized(src)) - ext4_ext_mark_uninitialized(dest); + if (ext4_ext_is_unwritten(src)) + ext4_ext_mark_unwritten(dest); else dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); } @@ -391,6 +391,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode, if (depth) { /* Register to journal */ + BUFFER_TRACE(orig_path->p_bh, "get_write_access"); ret = ext4_journal_get_write_access(handle, orig_path->p_bh); if (ret) return ret; @@ -593,14 +594,14 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * @inode: inode in question * @from: block offset of inode * @count: block count to be checked - * @uninit: extents expected to be uninitialized + * @unwritten: extents expected to be unwritten * @err: pointer to save error value * * Return 1 if all extents in range has expected type, and zero otherwise. */ static int mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, - int uninit, int *err) + int unwritten, int *err) { struct ext4_ext_path *path = NULL; struct ext4_extent *ext; @@ -611,7 +612,7 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, if (*err) goto out; ext = path[ext_depth(inode)].p_ext; - if (uninit != ext4_ext_is_uninitialized(ext)) + if (unwritten != ext4_ext_is_unwritten(ext)) goto out; from += ext4_ext_get_actual_len(ext); ext4_ext_drop_refs(path); @@ -894,7 +895,7 @@ out: * @orig_page_offset: page index on original file * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped - * @uninit: orig extent is uninitialized or not + * @unwritten: orig extent is unwritten or not * @err: pointer to save return value * * Save the data in original inode blocks and replace original inode extents @@ -905,7 +906,7 @@ out: static int move_extent_per_page(struct file *o_filp, struct inode *donor_inode, pgoff_t orig_page_offset, int data_offset_in_page, - int block_len_in_page, int uninit, int *err) + int block_len_in_page, int unwritten, int *err) { struct inode *orig_inode = file_inode(o_filp); struct page *pagep[2] = {NULL, NULL}; @@ -962,27 +963,27 @@ again: if (unlikely(*err < 0)) goto stop_journal; /* - * If orig extent was uninitialized it can become initialized + * If orig extent was unwritten it can become initialized * at any time after i_data_sem was dropped, in order to * serialize with delalloc we have recheck extent while we * hold page's lock, if it is still the case data copy is not * necessary, just swap data blocks between orig and donor. */ - if (uninit) { + if (unwritten) { ext4_double_down_write_data_sem(orig_inode, donor_inode); /* If any of extents in range became initialized we have to * fallback to data copying */ - uninit = mext_check_coverage(orig_inode, orig_blk_offset, - block_len_in_page, 1, err); + unwritten = mext_check_coverage(orig_inode, orig_blk_offset, + block_len_in_page, 1, err); if (*err) goto drop_data_sem; - uninit &= mext_check_coverage(donor_inode, orig_blk_offset, - block_len_in_page, 1, err); + unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, + block_len_in_page, 1, err); if (*err) goto drop_data_sem; - if (!uninit) { + if (!unwritten) { ext4_double_up_write_data_sem(orig_inode, donor_inode); goto data_copy; } @@ -1259,7 +1260,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; int data_offset_in_page; int block_len_in_page; - int uninit; + int unwritten; if (orig_inode->i_sb != donor_inode->i_sb) { ext4_debug("ext4 move extent: The argument files " @@ -1391,8 +1392,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, !last_extent) continue; - /* Is original extent is uninitialized */ - uninit = ext4_ext_is_uninitialized(ext_prev); + /* Is original extent is unwritten */ + unwritten = ext4_ext_is_unwritten(ext_prev); data_offset_in_page = seq_start % blocks_per_page; @@ -1432,8 +1433,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, o_filp, donor_inode, orig_page_offset, data_offset_in_page, - block_len_in_page, uninit, - &ret); + block_len_in_page, + unwritten, &ret); /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1cb84f78909e..3520ab8a6639 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -67,6 +67,7 @@ static struct buffer_head *ext4_append(handle_t *handle, return ERR_PTR(err); inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); @@ -1778,6 +1779,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); + BUFFER_TRACE(bh, "get_write_access"); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -2510,8 +2512,7 @@ static int empty_dir(struct inode *inode) ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); de = ext4_next_entry(de1, sb->s_blocksize); while (offset < inode->i_size) { - if (!bh || - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { unsigned int lblock; err = 0; brelse(bh); @@ -2539,26 +2540,37 @@ static int empty_dir(struct inode *inode) return 1; } -/* ext4_orphan_add() links an unlinked or truncated inode into a list of +/* + * ext4_orphan_add() links an unlinked or truncated inode into a list of * such inodes, starting at the superblock, in case we crash before the * file is closed/deleted, or in case the inode truncate spans multiple * transactions and the last transaction is not recovered after a crash. * * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). + * + * Orphan list manipulation functions must be called under i_mutex unless + * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) { struct super_block *sb = inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_iloc iloc; int err = 0, rc; + bool dirty = false; - if (!EXT4_SB(sb)->s_journal) + if (!sbi->s_journal) return 0; - mutex_lock(&EXT4_SB(sb)->s_orphan_lock); + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && + !mutex_is_locked(&inode->i_mutex)); + /* + * Exit early if inode already is on orphan list. This is a big speedup + * since we don't have to contend on the global s_orphan_lock. + */ if (!list_empty(&EXT4_I(inode)->i_orphan)) - goto out_unlock; + return 0; /* * Orphan handling is only valid for files with data blocks @@ -2569,48 +2581,51 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) - goto out_unlock; + goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) - goto out_unlock; + goto out; + + mutex_lock(&sbi->s_orphan_lock); /* * Due to previous errors inode may be already a part of on-disk * orphan list. If so skip on-disk list modification. */ - if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= - (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) - goto mem_insert; - - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); - EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_super(handle, sb); - rc = ext4_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; - - /* Only add to the head of the in-memory list if all the - * previous operations succeeded. If the orphan_add is going to - * fail (possibly taking the journal offline), we can't risk - * leaving the inode on the orphan list: stray orphan-list - * entries can cause panics at unmount time. - * - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ -mem_insert: - if (!err) - list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - + if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > + (le32_to_cpu(sbi->s_es->s_inodes_count))) { + /* Insert this inode at the head of the on-disk orphan list */ + NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + dirty = true; + } + list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); + mutex_unlock(&sbi->s_orphan_lock); + + if (dirty) { + err = ext4_handle_dirty_super(handle, sb); + rc = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (!err) + err = rc; + if (err) { + /* + * We have to remove inode from in-memory list if + * addition to on disk orphan list failed. Stray orphan + * list entries can cause panics at unmount time. + */ + mutex_lock(&sbi->s_orphan_lock); + list_del(&EXT4_I(inode)->i_orphan); + mutex_unlock(&sbi->s_orphan_lock); + } + } jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); -out_unlock: - mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); - ext4_std_error(inode->i_sb, err); +out: + ext4_std_error(sb, err); return err; } @@ -2622,45 +2637,51 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) { struct list_head *prev; struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 ino_next; struct ext4_iloc iloc; int err = 0; - if ((!EXT4_SB(inode->i_sb)->s_journal) && - !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) + if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) return 0; - mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && + !mutex_is_locked(&inode->i_mutex)); + /* Do this quick check before taking global s_orphan_lock. */ if (list_empty(&ei->i_orphan)) - goto out; + return 0; - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; - sbi = EXT4_SB(inode->i_sb); + if (handle) { + /* Grab inode buffer early before taking global s_orphan_lock */ + err = ext4_reserve_inode_write(handle, inode, &iloc); + } + mutex_lock(&sbi->s_orphan_lock); jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); + prev = ei->i_orphan.prev; list_del_init(&ei->i_orphan); /* If we're on an error path, we may not have a valid * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (!handle) - goto out; - - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) + if (!handle || err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_err; + } + ino_next = NEXT_ORPHAN(inode); if (prev == &sbi->s_orphan) { jbd_debug(4, "superblock will point to %u\n", ino_next); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); - if (err) + if (err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; + } sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { struct ext4_iloc iloc2; @@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %u\n", i_prev->i_ino, ino_next); err = ext4_reserve_inode_write(handle, i_prev, &iloc2); - if (err) + if (err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; + } NEXT_ORPHAN(i_prev) = ino_next; err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); + mutex_unlock(&sbi->s_orphan_lock); } if (err) goto out_brelse; NEXT_ORPHAN(inode) = 0; err = ext4_mark_iloc_dirty(handle, inode, &iloc); - out_err: ext4_std_error(inode->i_sb, err); -out: - mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index c18d95b50540..b24a2541a9ba 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -401,7 +401,8 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc) + struct writeback_control *wbc, + bool keep_towrite) { struct inode *inode = page->mapping->host; unsigned block_start, blocksize; @@ -414,10 +415,24 @@ int ext4_bio_write_page(struct ext4_io_submit *io, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - set_page_writeback(page); + if (keep_towrite) + set_page_writeback_keepwrite(page); + else + set_page_writeback(page); ClearPageError(page); /* + * Comments copied from block_write_full_page: + * + * The page straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + /* * In the first loop we prepare and mark buffers to submit. We have to * mark all buffers in the page before submitting so that * end_page_writeback() cannot be called from ext4_bio_end_io() when IO @@ -428,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { block_start = bh_offset(bh); if (block_start >= len) { - /* - * Comments copied from block_write_full_page_endio: - * - * The page straddles i_size. It must be zeroed out on - * each and every writepage invocation because it may - * be mmapped. "A file is mapped in multiples of the - * page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when - * mapped, and writes to that region are not written - * out to the file." - */ - zero_user_segment(page, block_start, - block_start + blocksize); clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f3b84cd9de56..bb0e80f03e2e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -42,7 +42,7 @@ int ext4_resize_begin(struct super_block *sb) void ext4_resize_end(struct super_block *sb) { clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, @@ -348,6 +348,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, bh = sb_getblk(sb, blk); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); + BUFFER_TRACE(bh, "get_write_access"); if ((err = ext4_journal_get_write_access(handle, bh))) { brelse(bh); bh = ERR_PTR(err); @@ -426,6 +427,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, if (unlikely(!bh)) return -ENOMEM; + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) return err; @@ -518,6 +520,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, goto out; } + BUFFER_TRACE(gdb, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb); if (err) { brelse(gdb); @@ -790,14 +793,17 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, goto exit_dind; } + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (unlikely(err)) goto exit_dind; + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) goto exit_dind; + BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, dind); if (unlikely(err)) ext4_std_error(sb, err); @@ -902,6 +908,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; ext4_kvfree(o_group_desc); + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) brelse(gdb_bh); @@ -977,6 +984,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } for (i = 0; i < reserved_gdb; i++) { + BUFFER_TRACE(primary[i], "get_write_access"); if ((err = ext4_journal_get_write_access(handle, primary[i]))) goto exit_bh; } @@ -1084,6 +1092,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, ext4_debug("update metadata backup %llu(+%llu)\n", backup_block, backup_block - ext4_group_first_block_no(sb, group)); + BUFFER_TRACE(bh, "get_write_access"); if ((err = ext4_journal_get_write_access(handle, bh))) break; lock_buffer(bh); @@ -1163,6 +1172,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, */ if (gdb_off) { gdb_bh = sbi->s_group_desc[gdb_num]; + BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) @@ -1433,6 +1443,7 @@ static int ext4_flex_group_add(struct super_block *sb, goto exit; } + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto exit_journal; @@ -1645,6 +1656,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, return err; } + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) { ext4_warning(sb, "error %d on journal write access", err); @@ -1804,6 +1816,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (IS_ERR(handle)) return PTR_ERR(handle); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto errout; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6f9e6fadac04..b9b9aabfb4d2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -138,8 +138,8 @@ static __le32 ext4_superblock_csum(struct super_block *sb, return cpu_to_le32(csum); } -int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es) +static int ext4_superblock_csum_verify(struct super_block *sb, + struct ext4_super_block *es) { if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -879,6 +879,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) return NULL; ei->vfs_inode.i_version = 1; + spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); ext4_es_init_tree(&ei->i_es_tree); @@ -1903,7 +1904,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (!(sbi->s_mount_state & EXT4_VALID_FS)) ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " "running e2fsck is recommended"); - else if ((sbi->s_mount_state & EXT4_ERROR_FS)) + else if (sbi->s_mount_state & EXT4_ERROR_FS) ext4_msg(sb, KERN_WARNING, "warning: mounting fs with errors, " "running e2fsck is recommended"); @@ -2404,6 +2405,16 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, if (ext4_bg_has_super(sb, bg)) has_super = 1; + /* + * If we have a meta_bg fs with 1k blocks, group 0's GDT is at + * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled + * on modern mke2fs or blksize > 1k on older mke2fs) then we must + * compensate. + */ + if (sb->s_blocksize == 1024 && nr == 0 && + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) + has_super++; + return (has_super + ext4_group_first_block_no(sb, bg)); } @@ -3337,7 +3348,7 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting - * uninitialized extents in delalloc path. In most cases such + * unwritten extents in delalloc path. In most cases such * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ @@ -5370,6 +5381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, bh = ext4_bread(handle, inode, blk, 1, &err); if (!bh) goto out; + BUFFER_TRACE(bh, "get write access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4eec399ec807..e7387337060c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -369,6 +369,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, { int error; + if (strlen(name) > 255) + return -ERANGE; + down_read(&EXT4_I(inode)->xattr_sem); error = ext4_xattr_ibody_get(inode, name_index, name, buffer, buffer_size); @@ -513,6 +516,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) return; + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); ext4_handle_dirty_super(handle, sb); @@ -532,6 +536,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); + BUFFER_TRACE(bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bh); if (error) goto out; @@ -774,6 +779,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (s->base) { ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, bs->bh->b_blocknr); + BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bs->bh); if (error) goto cleanup; @@ -859,6 +865,7 @@ inserted: EXT4_C2B(EXT4_SB(sb), 1)); if (error) goto cleanup; + BUFFER_TRACE(new_bh, "get_write_access"); error = ext4_journal_get_write_access(handle, new_bh); if (error) @@ -896,7 +903,7 @@ inserted: * take i_data_sem because we will test * i_delalloc_reserved_flag in ext4_mb_new_blocks */ - down_read((&EXT4_I(inode)->i_data_sem)); + down_read(&EXT4_I(inode)->i_data_sem); block = ext4_new_meta_blocks(handle, inode, goal, 0, NULL, &error); up_read((&EXT4_I(inode)->i_data_sem)); diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index e93e4ec7d165..dbe2141d10ad 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, } } - error = f2fs_setxattr(inode, name_index, "", value, size, ipage); + error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); kfree(value); if (!error) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4aa521aa9bc3..0b4710c1d370 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -33,12 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) { cond_resched(); goto repeat; } - + f2fs_wait_on_page_writeback(page, META); SetPageUptodate(page); return page; } @@ -69,11 +69,10 @@ repeat: goto repeat; } out: - mark_page_accessed(page); return page; } -inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) { switch (type) { case META_NAT: @@ -137,13 +136,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) if (!page) continue; if (PageUptodate(page)) { - mark_page_accessed(page); f2fs_put_page(page, 1); continue; } f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); - mark_page_accessed(page); f2fs_put_page(page, 0); } out: @@ -157,6 +154,8 @@ static int f2fs_write_meta_page(struct page *page, struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + trace_f2fs_writepage(page, META); + if (unlikely(sbi->por_doing)) goto redirty_out; if (wbc->for_reclaim) @@ -174,10 +173,7 @@ no_write: return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_META); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -187,6 +183,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff, written; + trace_f2fs_writepages(mapping->host, wbc, META); + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) @@ -370,7 +368,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) return; sbi->por_doing = true; - start_blk = __start_cp_addr(sbi) + 1; + + start_blk = __start_cp_addr(sbi) + 1 + + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); orphan_blkaddr = __start_sum_addr(sbi) - 1; ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); @@ -511,8 +511,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; + unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + block_t cp_blk_no; + int i; - sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -543,6 +546,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); + if (cp_blks <= 1) + goto done; + + cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); + if (cur_page == cp2) + cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + + for (i = 1; i < cp_blks; i++) { + void *sit_bitmap_ptr; + unsigned char *ckpt = (unsigned char *)sbi->ckpt; + + cur_page = get_meta_page(sbi, cp_blk_no + i); + sit_bitmap_ptr = page_address(cur_page); + memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); + f2fs_put_page(cur_page, 1); + } +done: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); return 0; @@ -555,14 +575,13 @@ fail_no_cp: static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *entry; - list_for_each_entry(entry, head, list) - if (unlikely(entry->inode == inode)) - return -EEXIST; + if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + return -EEXIST; - list_add_tail(&new->list, head); + set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + F2FS_I(inode)->dirty_dir = new; + list_add_tail(&new->list, &sbi->dir_inode_list); stat_inc_dirty_dir(sbi); return 0; } @@ -611,31 +630,26 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head; struct dir_inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; spin_lock(&sbi->dir_inode_lock); - if (get_dirty_dents(inode)) { + if (get_dirty_dents(inode) || + !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { spin_unlock(&sbi->dir_inode_lock); return; } - head = &sbi->dir_inode_list; - list_for_each_entry(entry, head, list) { - if (entry->inode == inode) { - list_del(&entry->list); - stat_dec_dirty_dir(sbi); - spin_unlock(&sbi->dir_inode_lock); - kmem_cache_free(inode_entry_slab, entry); - goto done; - } - } + entry = F2FS_I(inode)->dirty_dir; + list_del(&entry->list); + F2FS_I(inode)->dirty_dir = NULL; + clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + stat_dec_dirty_dir(sbi); spin_unlock(&sbi->dir_inode_lock); + kmem_cache_free(inode_entry_slab, entry); -done: /* Only from the recovery routine */ if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); @@ -643,26 +657,6 @@ done: } } -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - - struct list_head *head; - struct inode *inode = NULL; - struct dir_inode_entry *entry; - - spin_lock(&sbi->dir_inode_lock); - - head = &sbi->dir_inode_list; - list_for_each_entry(entry, head, list) { - if (entry->inode->i_ino == ino) { - inode = entry->inode; - break; - } - } - spin_unlock(&sbi->dir_inode_lock); - return inode; -} - void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; @@ -761,6 +755,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) __u32 crc32 = 0; void *kaddr; int i; + int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + + /* + * This avoids to conduct wrong roll-forward operations and uses + * metapages, so should be called prior to sync_meta_pages below. + */ + discard_next_dnode(sbi); /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) @@ -805,16 +806,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) / F2FS_ORPHANS_PER_BLOCK; - ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + cp_payload_blks + data_sum_blocks + + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks); + cp_payload_blks + data_sum_blocks + + orphan_blocks); } if (sbi->n_orphans) @@ -840,6 +844,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); + for (i = 1; i < 1 + cp_payload_blks; i++) { + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, + (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + } + if (sbi->n_orphans) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 45abd60e2bff..c1fb6dd10911 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -417,7 +417,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (unlikely(dn.data_blkaddr == NEW_ADDR)) return ERR_PTR(-EINVAL); - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -455,7 +455,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock, goto put_out; } - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); bh_result->b_size = (((size_t)1) << blkbits); dn.ofs_in_node++; pgofs++; @@ -675,8 +674,7 @@ get_next: if (dn.data_blkaddr == NEW_ADDR) goto put_out; - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } if (maxblocks > (bh_result->b_size >> blkbits)) { @@ -710,11 +708,19 @@ out: return err; } +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, get_data_block); +} + static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; int ret; + trace_f2fs_readpage(page, DATA); + /* If the file has inline data, try to read it directlly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); @@ -790,6 +796,8 @@ static int f2fs_write_data_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, DATA); + if (page->index < end_index) goto write; @@ -798,10 +806,8 @@ static int f2fs_write_data_page(struct page *page, * this page does not have to be written to disk. */ offset = i_size & (PAGE_CACHE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) { - inode_dec_dirty_dents(inode); + if ((page->index >= end_index + 1) || !offset) goto out; - } zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: @@ -810,7 +816,6 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - inode_dec_dirty_dents(inode); err = do_write_data_page(page, &fio); goto done; } @@ -832,15 +837,16 @@ done: clear_cold_data(page); out: + inode_dec_dirty_dents(inode); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, DATA, WRITE); return 0; redirty_out: - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -862,12 +868,15 @@ static int f2fs_write_data_pages(struct address_space *mapping, int ret; long diff; + trace_f2fs_writepages(mapping->host, wbc, DATA); + /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && - get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA)) + get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && + available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; diff = nr_pages_to_write(sbi, DATA, wbc); @@ -903,6 +912,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct dnode_of_data dn; int err = 0; + trace_f2fs_write_begin(inode, pos, len, flags); + f2fs_balance_fs(sbi); repeat: err = f2fs_convert_inline_data(inode, pos + len); @@ -912,6 +923,10 @@ repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; + + /* to avoid latency during memory pressure */ + unlock_page(page); + *pagep = page; if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) @@ -923,10 +938,18 @@ repeat: f2fs_unlock_op(sbi); if (err) { - f2fs_put_page(page, 1); + f2fs_put_page(page, 0); return err; } inline_data: + lock_page(page); + if (unlikely(page->mapping != mapping)) { + f2fs_put_page(page, 1); + goto repeat; + } + + f2fs_wait_on_page_writeback(page, DATA); + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -978,6 +1001,8 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_f2fs_write_end(inode, pos, len, copied); + SetPageUptodate(page); set_page_dirty(page); @@ -1022,6 +1047,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; + /* clear fsync mark to recover these blocks */ + fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, get_data_block); } @@ -1061,6 +1089,11 @@ static int f2fs_set_data_page_dirty(struct page *page) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { + struct inode *inode = mapping->host; + + if (f2fs_has_inline_data(inode)) + return 0; + return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 972fd0ef230f..966acb039e3b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode) static unsigned int dir_buckets(unsigned int level, int dir_level) { - if (level < MAX_DIR_HASH_DEPTH / 2) + if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) return 1 << (level + dir_level); else - return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); + return MAX_DIR_BUCKETS; } static unsigned int bucket_blocks(unsigned int level) @@ -268,6 +268,8 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; + f2fs_wait_on_page_writeback(ipage, NODE); + /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); ri->i_namelen = cpu_to_le32(name->len); @@ -637,11 +639,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; + struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); unsigned char d_type = DT_UNKNOWN; bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + /* readahead for multi pages of dir */ + if (npages - n > 1 && !ra_has_index(ra, n)) + page_cache_sync_readahead(inode->i_mapping, ra, file, n, + min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); + for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n); if (IS_ERR(dentry_page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ecac8312359..e51c732b0dd9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -182,6 +182,8 @@ enum { #define F2FS_LINK_MAX 32000 /* maximum link count per file */ +#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ + /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ @@ -218,6 +220,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ + struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ }; static inline void get_extent_info(struct extent_info *ext, @@ -243,6 +246,7 @@ static inline void set_raw_extent(struct extent_info *ext, struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ + nid_t available_nids; /* maximum available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ unsigned int ram_thresh; /* control the memory footprint */ @@ -323,6 +327,15 @@ struct flush_cmd { int ret; }; +struct flush_cmd_control { + struct task_struct *f2fs_issue_flush; /* flush thread */ + wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ + struct flush_cmd *issue_list; /* list for command issue */ + struct flush_cmd *dispatch_list; /* list for command dispatch */ + spinlock_t issue_lock; /* for issue list lock */ + struct flush_cmd *issue_tail; /* list tail of issue list */ +}; + struct f2fs_sm_info { struct sit_info *sit_info; /* whole segment information */ struct free_segmap_info *free_info; /* free segment information */ @@ -353,12 +366,8 @@ struct f2fs_sm_info { unsigned int min_ipu_util; /* in-place-update threshold */ /* for flush command control */ - struct task_struct *f2fs_issue_flush; /* flush thread */ - wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - struct flush_cmd *issue_list; /* list for command issue */ - struct flush_cmd *dispatch_list; /* list for command dispatch */ - spinlock_t issue_lock; /* for issue list lock */ - struct flush_cmd *issue_tail; /* list tail of issue list */ + struct flush_cmd_control *cmd_control_info; + }; /* @@ -755,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? + int offset; + + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (flag == NAT_BITMAP) + return &ckpt->sit_nat_version_bitmap; + else + return ((unsigned char *)ckpt + F2FS_BLKSIZE); + } else { + offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return &ckpt->sit_nat_version_bitmap + offset; + } } static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) @@ -958,6 +976,7 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) enum { FI_NEW_INODE, /* indicate newly allocated inode */ FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ @@ -1071,6 +1090,12 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) +/* get offset of first page in next direct node */ +#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \ + ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \ + (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \ + ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi)) + /* * file.c */ @@ -1140,8 +1165,10 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t); struct dnode_of_data; struct node_info; +bool available_free_memory(struct f2fs_sb_info *, int); int is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool fsync_mark_done(struct f2fs_sb_info *, nid_t); +void fsync_mark_clear(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); @@ -1176,9 +1203,12 @@ void destroy_node_manager_caches(void); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); int f2fs_issue_flush(struct f2fs_sb_info *); +int create_flush_cmd_control(struct f2fs_sb_info *); +void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); +void discard_next_dnode(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); @@ -1221,7 +1251,6 @@ int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); @@ -1242,6 +1271,7 @@ struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); /* * gc.c @@ -1391,5 +1421,6 @@ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_data(struct inode *, pgoff_t); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); +void truncate_inline_data(struct inode *, u64); int recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60e7d5448a1d..9c49c593d8eb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -19,6 +19,7 @@ #include <linux/compat.h> #include <linux/uaccess.h> #include <linux/mount.h> +#include <linux/pagevec.h> #include "f2fs.h" #include "node.h" @@ -194,6 +195,132 @@ out: return ret; } +static pgoff_t __get_first_dirty_index(struct address_space *mapping, + pgoff_t pgofs, int whence) +{ + struct pagevec pvec; + int nr_pages; + + if (whence != SEEK_DATA) + return 0; + + /* find first dirty page index */ + pagevec_init(&pvec, 0); + nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); + pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; + pagevec_release(&pvec); + return pgofs; +} + +static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, + int whence) +{ + switch (whence) { + case SEEK_DATA: + if ((blkaddr == NEW_ADDR && dirty == pgofs) || + (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + return true; + break; + case SEEK_HOLE: + if (blkaddr == NULL_ADDR) + return true; + break; + } + return false; +} + +static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxbytes = inode->i_sb->s_maxbytes; + struct dnode_of_data dn; + pgoff_t pgofs, end_offset, dirty; + loff_t data_ofs = offset; + loff_t isize; + int err = 0; + + mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); + if (offset >= isize) + goto fail; + + /* handle inline data case */ + if (f2fs_has_inline_data(inode)) { + if (whence == SEEK_HOLE) + data_ofs = isize; + goto found; + } + + pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); + + dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); + + for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + if (err && err != -ENOENT) { + goto fail; + } else if (err == -ENOENT) { + /* direct node is not exist */ + if (whence == SEEK_DATA) { + pgofs = PGOFS_OF_NEXT_DNODE(pgofs, + F2FS_I(inode)); + continue; + } else { + goto found; + } + } + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + + /* find data/hole in dnode block */ + for (; dn.ofs_in_node < end_offset; + dn.ofs_in_node++, pgofs++, + data_ofs = pgofs << PAGE_CACHE_SHIFT) { + block_t blkaddr; + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + + if (__found_offset(blkaddr, dirty, pgofs, whence)) { + f2fs_put_dnode(&dn); + goto found; + } + } + f2fs_put_dnode(&dn); + } + + if (whence == SEEK_DATA) + goto fail; +found: + if (whence == SEEK_HOLE && data_ofs > isize) + data_ofs = isize; + mutex_unlock(&inode->i_mutex); + return vfs_setpos(file, data_ofs, maxbytes); +fail: + mutex_unlock(&inode->i_mutex); + return -ENXIO; +} + +static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxbytes = inode->i_sb->s_maxbytes; + + switch (whence) { + case SEEK_SET: + case SEEK_CUR: + case SEEK_END: + return generic_file_llseek_size(file, offset, whence, + maxbytes, i_size_read(inode)); + case SEEK_DATA: + case SEEK_HOLE: + return f2fs_seek_block(file, offset, whence); + } + + return -EINVAL; +} + static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); @@ -242,6 +369,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; + if (f2fs_has_inline_data(inode)) + return truncate_inline_data(inode, from); + if (!offset) return; @@ -288,10 +418,7 @@ int truncate_blocks(struct inode *inode, u64 from) return err; } - if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE(F2FS_I(inode)); - else - count = ADDRS_PER_BLOCK; + count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; f2fs_bug_on(count < 0); @@ -413,6 +540,7 @@ const struct inode_operations f2fs_file_inode_operations = { .listxattr = f2fs_listxattr, .removexattr = generic_removexattr, #endif + .fiemap = f2fs_fiemap, }; static void fill_zero(struct inode *inode, pgoff_t index, @@ -555,6 +683,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, i_size_read(inode) < new_size) { i_size_write(inode, new_size); mark_inode_dirty(inode); + f2fs_write_inode(inode, NULL); } return ret; @@ -678,7 +807,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #endif const struct file_operations f2fs_file_operations = { - .llseek = generic_file_llseek, + .llseek = f2fs_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 383db1fabcf4..1bba5228c197 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -81,8 +81,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) f2fs_lock_op(sbi); ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } /* * i_addr[0] is not used for inline data, @@ -90,11 +92,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) */ set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, 0); - if (err) { - f2fs_unlock_op(sbi); - return err; - } + if (err) + goto out; + f2fs_wait_on_page_writeback(page, DATA); zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ @@ -118,6 +119,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) sync_inode_page(&dn); f2fs_put_dnode(&dn); +out: f2fs_unlock_op(sbi); return err; } @@ -132,7 +134,7 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) else if (to_size <= MAX_INLINE_DATA) return 0; - page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); + page = grab_cache_page(inode->i_mapping, 0); if (!page) return -ENOMEM; @@ -155,6 +157,7 @@ int f2fs_write_inline_data(struct inode *inode, return err; ipage = dn.inode_page; + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); src_addr = kmap(page); @@ -175,6 +178,26 @@ int f2fs_write_inline_data(struct inode *inode, return 0; } +void truncate_inline_data(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *ipage; + + if (from >= MAX_INLINE_DATA) + return; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return; + + f2fs_wait_on_page_writeback(ipage, NODE); + + zero_user_segment(ipage, INLINE_DATA_OFFSET + from, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + set_page_dirty(ipage); + f2fs_put_page(ipage, 1); +} + int recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -199,6 +222,8 @@ process_inline: ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + f2fs_wait_on_page_writeback(ipage, NODE); + src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); @@ -210,6 +235,7 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ee829d360468..adc622c6bdce 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -12,6 +12,7 @@ #include <linux/f2fs_fs.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/bitops.h> #include "f2fs.h" #include "node.h" @@ -21,20 +22,20 @@ void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | - S_NOATIME | S_DIRSYNC); + unsigned int new_fl = 0; if (flags & FS_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -294,4 +295,5 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: clear_inode(inode); + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a9409d19dfd4..9138c32aa698 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -41,18 +41,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } f2fs_unlock_op(sbi); - inode->i_uid = current_fsuid(); - - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else { - inode->i_gid = current_fsgid(); - } + inode_init_owner(inode, dir, mode); inode->i_ino = ino; - inode->i_mode = mode; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_generation = sbi->s_next_generation++; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a161e955c4c8..9dfb9a042fd2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -26,20 +26,26 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; -static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type) +bool available_free_memory(struct f2fs_sb_info *sbi, int type) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; unsigned long mem_size = 0; + bool res = false; si_meminfo(&val); - if (type == FREE_NIDS) - mem_size = nm_i->fcnt * sizeof(struct free_nid); - else if (type == NAT_ENTRIES) - mem_size += nm_i->nat_cnt * sizeof(struct nat_entry); - mem_size >>= 12; - - /* give 50:50 memory for free nids and nat caches respectively */ - return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11)); + /* give 25%, 25%, 50% memory for each components respectively */ + if (type == FREE_NIDS) { + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == NAT_ENTRIES) { + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == DIRTY_DENTS) { + mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + } + return res; } static void clear_node_page_dirty(struct page *page) @@ -147,6 +153,18 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) return fsync_done; } +void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) + e->fsync_done = false; + write_unlock(&nm_i->nat_tree_lock); +} + static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; @@ -179,9 +197,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); - nat_set_ino(e, le32_to_cpu(ne->ino)); - nat_set_version(e, ne->version); + node_info_from_raw_nat(&e->ni, ne); } write_unlock(&nm_i->nat_tree_lock); } @@ -243,7 +259,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (available_free_memory(nm_i, NAT_ENTRIES)) + if (available_free_memory(sbi, NAT_ENTRIES)) return 0; write_lock(&nm_i->nat_tree_lock); @@ -849,8 +865,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - dn->nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); if (!page) return ERR_PTR(-ENOMEM); @@ -867,6 +882,7 @@ struct page *new_node_page(struct dnode_of_data *dn, new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR, false); + f2fs_wait_on_page_writeback(page, NODE); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); SetPageUptodate(page); @@ -946,8 +962,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) struct page *page; int err; repeat: - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) return ERR_PTR(-ENOMEM); @@ -967,7 +982,6 @@ repeat: goto repeat; } got_it: - mark_page_accessed(page); return page; } @@ -1022,7 +1036,6 @@ page_hit: f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - mark_page_accessed(page); return page; } @@ -1196,6 +1209,8 @@ static int f2fs_write_node_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, NODE); + if (unlikely(sbi->por_doing)) goto redirty_out; @@ -1227,10 +1242,7 @@ static int f2fs_write_node_page(struct page *page, return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -1240,6 +1252,8 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff; + trace_f2fs_writepages(mapping->host, wbc, NODE); + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); @@ -1315,13 +1329,14 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, radix_tree_delete(&nm_i->free_nid_root, i->nid); } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; struct nat_entry *ne; bool allocated = false; - if (!available_free_memory(nm_i, FREE_NIDS)) + if (!available_free_memory(sbi, FREE_NIDS)) return -1; /* 0 nid should not be used */ @@ -1374,9 +1389,10 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; int i; @@ -1391,7 +1407,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { - if (add_free_nid(nm_i, start_nid, true) < 0) + if (add_free_nid(sbi, start_nid, true) < 0) break; } } @@ -1415,7 +1431,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) while (1) { struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(nm_i, page, nid); + scan_nat_page(sbi, page, nid); f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); @@ -1435,7 +1451,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); nid = le32_to_cpu(nid_in_journal(sum, i)); if (addr == NULL_ADDR) - add_free_nid(nm_i, nid, true); + add_free_nid(sbi, nid, true); else remove_free_nid(nm_i, nid); } @@ -1452,7 +1468,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: - if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) + if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) return false; spin_lock(&nm_i->free_nid_list_lock); @@ -1512,7 +1528,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(!i || i->state != NID_ALLOC); - if (!available_free_memory(nm_i, FREE_NIDS)) { + if (!available_free_memory(sbi, FREE_NIDS)) { __del_from_free_nid_list(nm_i, i); need_free = true; } else { @@ -1534,7 +1550,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, clear_node_page_dirty(page); } -void recover_inline_xattr(struct inode *inode, struct page *page) +static void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); void *src_addr, *dst_addr; @@ -1559,6 +1575,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); + f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); update_inode(inode, ipage); @@ -1614,6 +1631,11 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct node_info old_ni, new_ni; struct page *ipage; + get_node_info(sbi, ino, &old_ni); + + if (unlikely(old_ni.blk_addr != NULL_ADDR)) + return -EINVAL; + ipage = grab_cache_page(NODE_MAPPING(sbi), ino); if (!ipage) return -ENOMEM; @@ -1621,7 +1643,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - get_node_info(sbi, ino, &old_ni); SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); @@ -1647,35 +1668,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { - struct page *page; - int page_idx = start; + struct inode *inode = sbi->sb->s_bdev->bd_inode; + struct address_space *mapping = inode->i_mapping; + int i, page_idx = start; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; - for (; page_idx < start + nrpages; page_idx++) { - /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) + for (i = 0; page_idx < start + nrpages; page_idx++, i++) { + /* alloc page in bd_inode for reading node summary info */ + pages[i] = grab_cache_page(mapping, page_idx); + if (!pages[i]) break; - - lock_page(page); - page->index = page_idx; - list_add_tail(&page->lru, pages); + f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page->index, &fio); - f2fs_submit_merged_bio(sbi, META, READ); - - return page_idx - start; + return i; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1683,11 +1698,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, idx, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi->blocks_per_seg; @@ -1698,29 +1713,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages); + nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; - list_for_each_entry_safe(page, tmp, &page_list, lru) { + for (idx = 0; idx < nrpages; idx++) { if (err) goto skip; - lock_page(page); - if (unlikely(!PageUptodate(page))) { + lock_page(pages[idx]); + if (unlikely(!PageUptodate(pages[idx]))) { err = -EIO; } else { - rn = F2FS_NODE(page); + rn = F2FS_NODE(pages[idx]); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; } - unlock_page(page); + unlock_page(pages[idx]); skip: - list_del(&page->lru); - __free_pages(page, 0); + page_cache_release(pages[idx]); } + + invalidate_mapping_pages(inode->i_mapping, addr, + addr + nrpages); } return err; } @@ -1758,9 +1775,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); - nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); - nat_set_version(ne, raw_ne.version); + node_info_from_raw_nat(&ne->ni, &raw_ne); __set_nat_cache_dirty(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); } @@ -1793,7 +1808,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t nid; struct f2fs_nat_entry raw_ne; int offset = -1; - block_t new_blkaddr; if (nat_get_blkaddr(ne) == NEW_ADDR) continue; @@ -1829,11 +1843,7 @@ to_nat_page: f2fs_bug_on(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; flush_now: - new_blkaddr = nat_get_blkaddr(ne); - - raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); - raw_ne.block_addr = cpu_to_le32(new_blkaddr); - raw_ne.version = nat_get_version(ne); + raw_nat_from_node_info(&raw_ne, &ne->ni); if (offset < 0) { nat_blk->entries[nid - start_nid] = raw_ne; @@ -1843,7 +1853,7 @@ flush_now: } if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(NM_I(sbi), nid, false) <= 0) { + add_free_nid(sbi, nid, false) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); @@ -1871,8 +1881,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3; + nm_i->available_nids = nm_i->max_nid - 3; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5decc1a375f0..7281112cd1c8 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -59,12 +59,12 @@ struct nat_entry { do { \ ne->checkpointed = false; \ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ - } while (0); + } while (0) #define __clear_nat_cache_dirty(nm_i, ne) \ do { \ ne->checkpointed = true; \ list_move_tail(&ne->list, &nm_i->nat_entries); \ - } while (0); + } while (0) #define inc_node_version(version) (++version) static inline void node_info_from_raw_nat(struct node_info *ni, @@ -75,9 +75,18 @@ static inline void node_info_from_raw_nat(struct node_info *ni, ni->version = raw_ne->version; } -enum nid_type { +static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, + struct node_info *ni) +{ + raw_ne->ino = cpu_to_le32(ni->ino); + raw_ne->block_addr = cpu_to_le32(ni->blk_addr); + raw_ne->version = ni->version; +} + +enum mem_type { FREE_NIDS, /* indicates the free nid list */ - NAT_ENTRIES /* indicates the cached nat entry */ + NAT_ENTRIES, /* indicates the cached nat entry */ + DIRTY_DENTS /* indicates dirty dentry pages */ }; /* @@ -263,7 +272,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - wait_on_page_writeback(p); + f2fs_wait_on_page_writeback(p, NODE); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b1ae89f0f44e..a112368a4a86 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -46,15 +46,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct inode *dir, *einode; int err = 0; - dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); - if (!dir) { - dir = f2fs_iget(inode->i_sb, pino); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; - } - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); - add_dirty_dir_inode(dir); + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; } name.len = le32_to_cpu(raw_inode->i_namelen); @@ -63,7 +58,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) if (unlikely(name.len > F2FS_NAME_LEN)) { WARN_ON(1); err = -ENAMETOOLONG; - goto out; + goto out_err; } retry: de = f2fs_find_entry(dir, &name, &page); @@ -73,7 +68,8 @@ retry: einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { WARN_ON(1); - if (PTR_ERR(einode) == -ENOENT) + err = PTR_ERR(einode); + if (err == -ENOENT) err = -EEXIST; goto out_unmap_put; } @@ -87,11 +83,23 @@ retry: goto retry; } err = __f2fs_add_link(dir, &name, inode); + if (err) + goto out_err; + + if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { + iput(dir); + } else { + add_dirty_dir_inode(dir); + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + } + goto out; out_unmap_put: kunmap(page); f2fs_put_page(page, 0); +out_err: + iput(dir); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "%s: ino = %x, name = %s, dir = %lx, err = %d", @@ -299,10 +307,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto out; start = start_bidx_of_node(ofs_of_node(page), fi); - if (IS_INODE(page)) - end = start + ADDRS_PER_INODE(fi); - else - end = start + ADDRS_PER_BLOCK; + end = start + ADDRS_PER_PAGE(page, fi); f2fs_lock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 085f548be7a3..f25f0e07e26f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -25,7 +25,6 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; -static struct kmem_cache *flush_cmd_slab; /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since @@ -200,20 +199,20 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; - struct f2fs_sm_info *sm_i = SM_I(sbi); - wait_queue_head_t *q = &sm_i->flush_wait_queue; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + wait_queue_head_t *q = &fcc->flush_wait_queue; repeat: if (kthread_should_stop()) return 0; - spin_lock(&sm_i->issue_lock); - if (sm_i->issue_list) { - sm_i->dispatch_list = sm_i->issue_list; - sm_i->issue_list = sm_i->issue_tail = NULL; + spin_lock(&fcc->issue_lock); + if (fcc->issue_list) { + fcc->dispatch_list = fcc->issue_list; + fcc->issue_list = fcc->issue_tail = NULL; } - spin_unlock(&sm_i->issue_lock); + spin_unlock(&fcc->issue_lock); - if (sm_i->dispatch_list) { + if (fcc->dispatch_list) { struct bio *bio = bio_alloc(GFP_NOIO, 0); struct flush_cmd *cmd, *next; int ret; @@ -221,47 +220,79 @@ repeat: bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); - for (cmd = sm_i->dispatch_list; cmd; cmd = next) { + for (cmd = fcc->dispatch_list; cmd; cmd = next) { cmd->ret = ret; next = cmd->next; complete(&cmd->wait); } - sm_i->dispatch_list = NULL; + bio_put(bio); + fcc->dispatch_list = NULL; } - wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list); + wait_event_interruptible(*q, + kthread_should_stop() || fcc->issue_list); goto repeat; } int f2fs_issue_flush(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_i = SM_I(sbi); - struct flush_cmd *cmd; - int ret; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd cmd; if (!test_opt(sbi, FLUSH_MERGE)) return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); - cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC); - cmd->next = NULL; - cmd->ret = 0; - init_completion(&cmd->wait); + init_completion(&cmd.wait); + cmd.next = NULL; - spin_lock(&sm_i->issue_lock); - if (sm_i->issue_list) - sm_i->issue_tail->next = cmd; + spin_lock(&fcc->issue_lock); + if (fcc->issue_list) + fcc->issue_tail->next = &cmd; else - sm_i->issue_list = cmd; - sm_i->issue_tail = cmd; - spin_unlock(&sm_i->issue_lock); + fcc->issue_list = &cmd; + fcc->issue_tail = &cmd; + spin_unlock(&fcc->issue_lock); - if (!sm_i->dispatch_list) - wake_up(&sm_i->flush_wait_queue); + if (!fcc->dispatch_list) + wake_up(&fcc->flush_wait_queue); - wait_for_completion(&cmd->wait); - ret = cmd->ret; - kmem_cache_free(flush_cmd_slab, cmd); - return ret; + wait_for_completion(&cmd.wait); + + return cmd.ret; +} + +int create_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + struct flush_cmd_control *fcc; + int err = 0; + + fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + if (!fcc) + return -ENOMEM; + spin_lock_init(&fcc->issue_lock); + init_waitqueue_head(&fcc->flush_wait_queue); + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, + "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(fcc->f2fs_issue_flush)) { + err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); + return err; + } + sbi->sm_info->cmd_control_info = fcc; + + return err; +} + +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + struct flush_cmd_control *fcc = + sbi->sm_info->cmd_control_info; + + if (fcc && fcc->f2fs_issue_flush) + kthread_stop(fcc->f2fs_issue_flush); + kfree(fcc); + sbi->sm_info->cmd_control_info = NULL; } static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, @@ -336,13 +367,26 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void f2fs_issue_discard(struct f2fs_sb_info *sbi, +static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); - blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); +} + +void discard_next_dnode(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + if (f2fs_issue_discard(sbi, blkaddr, 1)) { + struct page *page = grab_meta_page(sbi, blkaddr); + /* zero-filled page */ + set_page_dirty(page); + f2fs_put_page(page, 1); + } } static void add_discard_addrs(struct f2fs_sb_info *sbi, @@ -1832,7 +1876,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - dev_t dev = sbi->sb->s_bdev->bd_dev; struct f2fs_sm_info *sm_info; int err; @@ -1860,14 +1903,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; - if (test_opt(sbi, FLUSH_MERGE)) { - spin_lock_init(&sm_info->issue_lock); - init_waitqueue_head(&sm_info->flush_wait_queue); - - sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, - "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(sm_info->f2fs_issue_flush)) - return PTR_ERR(sm_info->f2fs_issue_flush); + if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + err = create_flush_cmd_control(sbi); + if (err) + return err; } err = build_sit_info(sbi); @@ -1976,10 +2015,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); + if (!sm_info) return; - if (sm_info->f2fs_issue_flush) - kthread_stop(sm_info->f2fs_issue_flush); + destroy_flush_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); @@ -1994,17 +2033,10 @@ int __init create_segment_manager_caches(void) sizeof(struct discard_entry)); if (!discard_entry_slab) return -ENOMEM; - flush_cmd_slab = f2fs_kmem_cache_create("flush_command", - sizeof(struct flush_cmd)); - if (!flush_cmd_slab) { - kmem_cache_destroy(discard_entry_slab); - return -ENOMEM; - } return 0; } void destroy_segment_manager_caches(void) { kmem_cache_destroy(discard_entry_slab); - kmem_cache_destroy(flush_cmd_slab); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c756923a7302..b2b18637cb9e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -514,7 +514,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) seq_printf(seq, ",background_gc=%s", "on"); else seq_printf(seq, ",background_gc=%s", "off"); @@ -542,7 +542,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); - if (test_opt(sbi, FLUSH_MERGE)) + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); @@ -594,6 +594,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; int err, active_logs; + bool need_restart_gc = false; + bool need_stop_gc = false; sync_filesystem(sb); @@ -611,7 +613,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* * Previous and new state of filesystem is RO, - * so no point in checking GC conditions. + * so skip checking GC and FLUSH_MERGE conditions. */ if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) goto skip; @@ -625,18 +627,40 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (sbi->gc_thread) { stop_gc_thread(sbi); f2fs_sync_fs(sb, 1); + need_restart_gc = true; } } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; + need_stop_gc = true; + } + + /* + * We stop issue flush thread if FS is mounted as RO + * or if flush_merge is not passed in mount option. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + destroy_flush_cmd_control(sbi); + } else if (test_opt(sbi, FLUSH_MERGE) && + !sbi->sm_info->cmd_control_info) { + err = create_flush_cmd_control(sbi); + if (err) + goto restore_gc; } skip: /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); return 0; - +restore_gc: + if (need_restart_gc) { + if (start_gc_thread(sbi)) + f2fs_msg(sbi->sb, KERN_WARNING, + "background gc thread is stop"); + } else if (need_stop_gc) { + stop_gc_thread(sbi); + } restore_opts: sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 503c2451131e..8bea941ee309 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -26,7 +26,7 @@ #include "xattr.h" static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) + size_t list_size, const char *name, size_t len, int type) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); int total_len, prefix_len = 0; @@ -53,11 +53,11 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, return -EINVAL; } - total_len = prefix_len + name_len + 1; + total_len = prefix_len + len + 1; if (list && total_len <= list_size) { memcpy(list, prefix, prefix_len); - memcpy(list + prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; + memcpy(list + prefix_len, name, len); + list[prefix_len + len] = '\0'; } return total_len; } @@ -108,11 +108,12 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); + return f2fs_setxattr(dentry->d_inode, type, name, + value, size, NULL, flags); } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) + size_t list_size, const char *name, size_t len, int type) { const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; size_t size; @@ -155,9 +156,6 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, } #ifdef CONFIG_F2FS_FS_SECURITY -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage); static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) { @@ -165,9 +163,9 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, - xattr->value_len, (struct page *)page); + xattr->value_len, (struct page *)page, 0); if (err < 0) break; } @@ -241,26 +239,26 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { NULL, }; -static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +static inline const struct xattr_handler *f2fs_xattr_handler(int index) { const struct xattr_handler *handler = NULL; - if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) - handler = f2fs_xattr_handler_map[name_index]; + if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[index]; return handler; } -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, - size_t name_len, const char *name) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; list_for_each_xattr(entry, base_addr) { - if (entry->e_name_index != name_index) + if (entry->e_name_index != index) continue; - if (entry->e_name_len != name_len) + if (entry->e_name_len != len) continue; - if (!memcmp(entry->e_name, name, name_len)) + if (!memcmp(entry->e_name, name, len)) break; } return entry; @@ -347,6 +345,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(ipage); + f2fs_wait_on_page_writeback(ipage, NODE); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -354,6 +353,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return PTR_ERR(page); } inline_addr = inline_xattr_addr(page); + f2fs_wait_on_page_writeback(page, NODE); } memcpy(inline_addr, txattr_addr, inline_size); f2fs_put_page(page, 1); @@ -374,6 +374,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return PTR_ERR(xpage); } f2fs_bug_on(new_nid); + f2fs_wait_on_page_writeback(xpage, NODE); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -396,42 +397,43 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return 0; } -int f2fs_getxattr(struct inode *inode, int name_index, const char *name, +int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { struct f2fs_xattr_entry *entry; void *base_addr; int error = 0; - size_t value_len, name_len; + size_t size, len; if (name == NULL) return -EINVAL; - name_len = strlen(name); - if (name_len > F2FS_NAME_LEN) + + len = strlen(name); + if (len > F2FS_NAME_LEN) return -ERANGE; base_addr = read_all_xattrs(inode, NULL); if (!base_addr) return -ENOMEM; - entry = __find_xattr(base_addr, name_index, name_len, name); + entry = __find_xattr(base_addr, index, len, name); if (IS_XATTR_LAST_ENTRY(entry)) { error = -ENODATA; goto cleanup; } - value_len = le16_to_cpu(entry->e_value_size); + size = le16_to_cpu(entry->e_value_size); - if (buffer && value_len > buffer_size) { + if (buffer && size > buffer_size) { error = -ERANGE; goto cleanup; } if (buffer) { char *pval = entry->e_name + entry->e_name_len; - memcpy(buffer, pval, value_len); + memcpy(buffer, pval, size); } - error = value_len; + error = size; cleanup: kzfree(base_addr); @@ -475,15 +477,15 @@ cleanup: return error; } -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage) +static int __f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size, + struct page *ipage, int flags) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; - size_t name_len; + size_t len; __u32 new_hsize; int error = -ENOMEM; @@ -491,11 +493,11 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, return -EINVAL; if (value == NULL) - value_len = 0; + size = 0; - name_len = strlen(name); + len = strlen(name); - if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) + if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) return -ERANGE; base_addr = read_all_xattrs(inode, ipage); @@ -503,16 +505,23 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, goto exit; /* find entry with wanted name. */ - here = __find_xattr(base_addr, name_index, name_len, name); + here = __find_xattr(base_addr, index, len, name); found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; - last = here; + if ((flags & XATTR_REPLACE) && !found) { + error = -ENODATA; + goto exit; + } else if ((flags & XATTR_CREATE) && found) { + error = -EEXIST; + goto exit; + } + + last = here; while (!IS_XATTR_LAST_ENTRY(last)) last = XATTR_NEXT_ENTRY(last); - newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + - name_len + value_len); + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size); /* 1. Check space */ if (value) { @@ -555,12 +564,12 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, * We just write new entry. */ memset(last, 0, newsize); - last->e_name_index = name_index; - last->e_name_len = name_len; - memcpy(last->e_name, name, name_len); - pval = last->e_name + name_len; - memcpy(pval, value, value_len); - last->e_value_size = cpu_to_le16(value_len); + last->e_name_index = index; + last->e_name_len = len; + memcpy(last->e_name, name, len); + pval = last->e_name + len; + memcpy(pval, value, size); + last->e_value_size = cpu_to_le16(size); new_hsize += newsize; } @@ -583,18 +592,23 @@ exit: return error; } -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, struct page *ipage) +int f2fs_setxattr(struct inode *inode, int index, const char *name, + const void *value, size_t size, + struct page *ipage, int flags) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int err; + /* this case is only from init_inode_metadata */ + if (ipage) + return __f2fs_setxattr(inode, index, name, value, + size, ipage, flags); f2fs_balance_fs(sbi); f2fs_lock_op(sbi); /* protect xattr_ver */ down_write(&F2FS_I(inode)->i_sem); - err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index b21d9ebdeff3..34ab7dbcf5e3 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -114,18 +114,18 @@ extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, - const void *, size_t, struct page *); + const void *, size_t, struct page *, int); extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL -static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) +static inline int f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size, int flags) { return -EOPNOTSUPP; } -static inline int f2fs_getxattr(struct inode *inode, int name_index, +static inline int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { return -EOPNOTSUPP; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 7c31f4bc74a9..e0c4ba39a377 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -52,7 +52,8 @@ struct fat_mount_options { usefree:1, /* Use free_clusters for FAT32 */ tz_set:1, /* Filesystem timestamps' offset set */ rodir:1, /* allow ATTR_RO for directory */ - discard:1; /* Issue discard requests on deletions */ + discard:1, /* Issue discard requests on deletions */ + dos1xfloppy:1; /* Assume default BPB for DOS 1.x floppies */ }; #define FAT_HASH_BITS 8 diff --git a/fs/fat/inode.c b/fs/fat/inode.c index b3361fe2bcb5..9c83594d7fb5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -35,9 +35,71 @@ #define CONFIG_FAT_DEFAULT_IOCHARSET "" #endif +#define KB_IN_SECTORS 2 + +/* + * A deserialized copy of the on-disk structure laid out in struct + * fat_boot_sector. + */ +struct fat_bios_param_block { + u16 fat_sector_size; + u8 fat_sec_per_clus; + u16 fat_reserved; + u8 fat_fats; + u16 fat_dir_entries; + u16 fat_sectors; + u16 fat_fat_length; + u32 fat_total_sect; + + u8 fat16_state; + u32 fat16_vol_id; + + u32 fat32_length; + u32 fat32_root_cluster; + u16 fat32_info_sector; + u8 fat32_state; + u32 fat32_vol_id; +}; + static int fat_default_codepage = CONFIG_FAT_DEFAULT_CODEPAGE; static char fat_default_iocharset[] = CONFIG_FAT_DEFAULT_IOCHARSET; +static struct fat_floppy_defaults { + unsigned nr_sectors; + unsigned sec_per_clus; + unsigned dir_entries; + unsigned media; + unsigned fat_length; +} floppy_defaults[] = { +{ + .nr_sectors = 160 * KB_IN_SECTORS, + .sec_per_clus = 1, + .dir_entries = 64, + .media = 0xFE, + .fat_length = 1, +}, +{ + .nr_sectors = 180 * KB_IN_SECTORS, + .sec_per_clus = 1, + .dir_entries = 64, + .media = 0xFC, + .fat_length = 2, +}, +{ + .nr_sectors = 320 * KB_IN_SECTORS, + .sec_per_clus = 2, + .dir_entries = 112, + .media = 0xFF, + .fat_length = 1, +}, +{ + .nr_sectors = 360 * KB_IN_SECTORS, + .sec_per_clus = 2, + .dir_entries = 112, + .media = 0xFD, + .fat_length = 2, +}, +}; static int fat_add_cluster(struct inode *inode) { @@ -359,7 +421,7 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos) static int is_exec(unsigned char *extension) { - unsigned char *exe_extensions = "EXECOMBAT", *walk; + unsigned char exe_extensions[] = "EXECOMBAT", *walk; for (walk = exe_extensions; *walk; walk += 3) if (!strncmp(extension, walk, 3)) @@ -853,6 +915,8 @@ static int fat_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",nfs=stale_rw"); if (opts->discard) seq_puts(m, ",discard"); + if (opts->dos1xfloppy) + seq_puts(m, ",dos1xfloppy"); return 0; } @@ -867,7 +931,7 @@ enum { Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset, - Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, + Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, Opt_dos1xfloppy, }; static const match_table_t fat_tokens = { @@ -900,6 +964,7 @@ static const match_table_t fat_tokens = { {Opt_nfs_stale_rw, "nfs"}, {Opt_nfs_stale_rw, "nfs=stale_rw"}, {Opt_nfs_nostale_ro, "nfs=nostale_ro"}, + {Opt_dos1xfloppy, "dos1xfloppy"}, {Opt_obsolete, "conv=binary"}, {Opt_obsolete, "conv=text"}, {Opt_obsolete, "conv=auto"}, @@ -1102,6 +1167,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, case Opt_nfs_nostale_ro: opts->nfs = FAT_NFS_NOSTALE_RO; break; + case Opt_dos1xfloppy: + opts->dos1xfloppy = 1; + break; /* msdos specific */ case Opt_dots: @@ -1247,6 +1315,169 @@ static unsigned long calc_fat_clusters(struct super_block *sb) return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; } +static bool fat_bpb_is_zero(struct fat_boot_sector *b) +{ + if (get_unaligned_le16(&b->sector_size)) + return false; + if (b->sec_per_clus) + return false; + if (b->reserved) + return false; + if (b->fats) + return false; + if (get_unaligned_le16(&b->dir_entries)) + return false; + if (get_unaligned_le16(&b->sectors)) + return false; + if (b->media) + return false; + if (b->fat_length) + return false; + if (b->secs_track) + return false; + if (b->heads) + return false; + return true; +} + +static int fat_read_bpb(struct super_block *sb, struct fat_boot_sector *b, + int silent, struct fat_bios_param_block *bpb) +{ + int error = -EINVAL; + + /* Read in BPB ... */ + memset(bpb, 0, sizeof(*bpb)); + bpb->fat_sector_size = get_unaligned_le16(&b->sector_size); + bpb->fat_sec_per_clus = b->sec_per_clus; + bpb->fat_reserved = le16_to_cpu(b->reserved); + bpb->fat_fats = b->fats; + bpb->fat_dir_entries = get_unaligned_le16(&b->dir_entries); + bpb->fat_sectors = get_unaligned_le16(&b->sectors); + bpb->fat_fat_length = le16_to_cpu(b->fat_length); + bpb->fat_total_sect = le32_to_cpu(b->total_sect); + + bpb->fat16_state = b->fat16.state; + bpb->fat16_vol_id = get_unaligned_le32(b->fat16.vol_id); + + bpb->fat32_length = le32_to_cpu(b->fat32.length); + bpb->fat32_root_cluster = le32_to_cpu(b->fat32.root_cluster); + bpb->fat32_info_sector = le16_to_cpu(b->fat32.info_sector); + bpb->fat32_state = b->fat32.state; + bpb->fat32_vol_id = get_unaligned_le32(b->fat32.vol_id); + + /* Validate this looks like a FAT filesystem BPB */ + if (!bpb->fat_reserved) { + if (!silent) + fat_msg(sb, KERN_ERR, + "bogus number of reserved sectors"); + goto out; + } + if (!bpb->fat_fats) { + if (!silent) + fat_msg(sb, KERN_ERR, "bogus number of FAT structure"); + goto out; + } + + /* + * Earlier we checked here that b->secs_track and b->head are nonzero, + * but it turns out valid FAT filesystems can have zero there. + */ + + if (!fat_valid_media(b->media)) { + if (!silent) + fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)", + (unsigned)b->media); + goto out; + } + + if (!is_power_of_2(bpb->fat_sector_size) + || (bpb->fat_sector_size < 512) + || (bpb->fat_sector_size > 4096)) { + if (!silent) + fat_msg(sb, KERN_ERR, "bogus logical sector size %u", + (unsigned)bpb->fat_sector_size); + goto out; + } + + if (!is_power_of_2(bpb->fat_sec_per_clus)) { + if (!silent) + fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u", + (unsigned)bpb->fat_sec_per_clus); + goto out; + } + + error = 0; + +out: + return error; +} + +static int fat_read_static_bpb(struct super_block *sb, + struct fat_boot_sector *b, int silent, + struct fat_bios_param_block *bpb) +{ + static const char *notdos1x = "This doesn't look like a DOS 1.x volume"; + + struct fat_floppy_defaults *fdefaults = NULL; + int error = -EINVAL; + sector_t bd_sects; + unsigned i; + + bd_sects = i_size_read(sb->s_bdev->bd_inode) / SECTOR_SIZE; + + /* 16-bit DOS 1.x reliably wrote bootstrap short-jmp code */ + if (b->ignored[0] != 0xeb || b->ignored[2] != 0x90) { + if (!silent) + fat_msg(sb, KERN_ERR, + "%s; no bootstrapping code", notdos1x); + goto out; + } + + /* + * If any value in this region is non-zero, it isn't archaic + * DOS. + */ + if (!fat_bpb_is_zero(b)) { + if (!silent) + fat_msg(sb, KERN_ERR, + "%s; DOS 2.x BPB is non-zero", notdos1x); + goto out; + } + + for (i = 0; i < ARRAY_SIZE(floppy_defaults); i++) { + if (floppy_defaults[i].nr_sectors == bd_sects) { + fdefaults = &floppy_defaults[i]; + break; + } + } + + if (fdefaults == NULL) { + if (!silent) + fat_msg(sb, KERN_WARNING, + "This looks like a DOS 1.x volume, but isn't a recognized floppy size (%llu sectors)", + (u64)bd_sects); + goto out; + } + + if (!silent) + fat_msg(sb, KERN_INFO, + "This looks like a DOS 1.x volume; assuming default BPB values"); + + memset(bpb, 0, sizeof(*bpb)); + bpb->fat_sector_size = SECTOR_SIZE; + bpb->fat_sec_per_clus = fdefaults->sec_per_clus; + bpb->fat_reserved = 1; + bpb->fat_fats = 2; + bpb->fat_dir_entries = fdefaults->dir_entries; + bpb->fat_sectors = fdefaults->nr_sectors; + bpb->fat_fat_length = fdefaults->fat_length; + + error = 0; + +out: + return error; +} + /* * Read the super block of an MS-DOS FS. */ @@ -1256,12 +1487,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, struct inode *root_inode = NULL, *fat_inode = NULL; struct inode *fsinfo_inode = NULL; struct buffer_head *bh; - struct fat_boot_sector *b; + struct fat_bios_param_block bpb; struct msdos_sb_info *sbi; u16 logical_sector_size; u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors; int debug; - unsigned int media; long error; char buf[50]; @@ -1298,100 +1528,72 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, goto out_fail; } - b = (struct fat_boot_sector *) bh->b_data; - if (!b->reserved) { - if (!silent) - fat_msg(sb, KERN_ERR, "bogus number of reserved sectors"); - brelse(bh); - goto out_invalid; - } - if (!b->fats) { - if (!silent) - fat_msg(sb, KERN_ERR, "bogus number of FAT structure"); - brelse(bh); - goto out_invalid; - } - - /* - * Earlier we checked here that b->secs_track and b->head are nonzero, - * but it turns out valid FAT filesystems can have zero there. - */ + error = fat_read_bpb(sb, (struct fat_boot_sector *)bh->b_data, silent, + &bpb); + if (error == -EINVAL && sbi->options.dos1xfloppy) + error = fat_read_static_bpb(sb, + (struct fat_boot_sector *)bh->b_data, silent, &bpb); + brelse(bh); - media = b->media; - if (!fat_valid_media(media)) { - if (!silent) - fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)", - media); - brelse(bh); - goto out_invalid; - } - logical_sector_size = get_unaligned_le16(&b->sector_size); - if (!is_power_of_2(logical_sector_size) - || (logical_sector_size < 512) - || (logical_sector_size > 4096)) { - if (!silent) - fat_msg(sb, KERN_ERR, "bogus logical sector size %u", - logical_sector_size); - brelse(bh); - goto out_invalid; - } - sbi->sec_per_clus = b->sec_per_clus; - if (!is_power_of_2(sbi->sec_per_clus)) { - if (!silent) - fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u", - sbi->sec_per_clus); - brelse(bh); + if (error == -EINVAL) goto out_invalid; - } + else if (error) + goto out_fail; + + logical_sector_size = bpb.fat_sector_size; + sbi->sec_per_clus = bpb.fat_sec_per_clus; + error = -EIO; if (logical_sector_size < sb->s_blocksize) { fat_msg(sb, KERN_ERR, "logical sector size too small for device" " (logical sector size = %u)", logical_sector_size); - brelse(bh); goto out_fail; } + if (logical_sector_size > sb->s_blocksize) { - brelse(bh); + struct buffer_head *bh_resize; if (!sb_set_blocksize(sb, logical_sector_size)) { fat_msg(sb, KERN_ERR, "unable to set blocksize %u", logical_sector_size); goto out_fail; } - bh = sb_bread(sb, 0); - if (bh == NULL) { + + /* Verify that the larger boot sector is fully readable */ + bh_resize = sb_bread(sb, 0); + if (bh_resize == NULL) { fat_msg(sb, KERN_ERR, "unable to read boot sector" " (logical sector size = %lu)", sb->s_blocksize); goto out_fail; } - b = (struct fat_boot_sector *) bh->b_data; + brelse(bh_resize); } mutex_init(&sbi->s_lock); sbi->cluster_size = sb->s_blocksize * sbi->sec_per_clus; sbi->cluster_bits = ffs(sbi->cluster_size) - 1; - sbi->fats = b->fats; + sbi->fats = bpb.fat_fats; sbi->fat_bits = 0; /* Don't know yet */ - sbi->fat_start = le16_to_cpu(b->reserved); - sbi->fat_length = le16_to_cpu(b->fat_length); + sbi->fat_start = bpb.fat_reserved; + sbi->fat_length = bpb.fat_fat_length; sbi->root_cluster = 0; sbi->free_clusters = -1; /* Don't know yet */ sbi->free_clus_valid = 0; sbi->prev_free = FAT_START_ENT; sb->s_maxbytes = 0xffffffff; - if (!sbi->fat_length && b->fat32.length) { + if (!sbi->fat_length && bpb.fat32_length) { struct fat_boot_fsinfo *fsinfo; struct buffer_head *fsinfo_bh; /* Must be FAT32 */ sbi->fat_bits = 32; - sbi->fat_length = le32_to_cpu(b->fat32.length); - sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster); + sbi->fat_length = bpb.fat32_length; + sbi->root_cluster = bpb.fat32_root_cluster; /* MC - if info_sector is 0, don't multiply by 0 */ - sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector); + sbi->fsinfo_sector = bpb.fat32_info_sector; if (sbi->fsinfo_sector == 0) sbi->fsinfo_sector = 1; @@ -1399,7 +1601,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (fsinfo_bh == NULL) { fat_msg(sb, KERN_ERR, "bread failed, FSINFO block" " (sector = %lu)", sbi->fsinfo_sector); - brelse(bh); goto out_fail; } @@ -1422,35 +1623,28 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, /* interpret volume ID as a little endian 32 bit integer */ if (sbi->fat_bits == 32) - sbi->vol_id = (((u32)b->fat32.vol_id[0]) | - ((u32)b->fat32.vol_id[1] << 8) | - ((u32)b->fat32.vol_id[2] << 16) | - ((u32)b->fat32.vol_id[3] << 24)); + sbi->vol_id = bpb.fat32_vol_id; else /* fat 16 or 12 */ - sbi->vol_id = (((u32)b->fat16.vol_id[0]) | - ((u32)b->fat16.vol_id[1] << 8) | - ((u32)b->fat16.vol_id[2] << 16) | - ((u32)b->fat16.vol_id[3] << 24)); + sbi->vol_id = bpb.fat16_vol_id; sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; - sbi->dir_entries = get_unaligned_le16(&b->dir_entries); + sbi->dir_entries = bpb.fat_dir_entries; if (sbi->dir_entries & (sbi->dir_per_block - 1)) { if (!silent) fat_msg(sb, KERN_ERR, "bogus directory-entries per block" " (%u)", sbi->dir_entries); - brelse(bh); goto out_invalid; } rootdir_sectors = sbi->dir_entries * sizeof(struct msdos_dir_entry) / sb->s_blocksize; sbi->data_start = sbi->dir_start + rootdir_sectors; - total_sectors = get_unaligned_le16(&b->sectors); + total_sectors = bpb.fat_sectors; if (total_sectors == 0) - total_sectors = le32_to_cpu(b->total_sect); + total_sectors = bpb.fat_total_sect; total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus; @@ -1459,9 +1653,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ if (sbi->fat_bits == 32) - sbi->dirty = b->fat32.state & FAT_STATE_DIRTY; + sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY; else /* fat 16 or 12 */ - sbi->dirty = b->fat16.state & FAT_STATE_DIRTY; + sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY; /* check that FAT table does not overflow */ fat_clusters = calc_fat_clusters(sb); @@ -1470,7 +1664,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (!silent) fat_msg(sb, KERN_ERR, "count of clusters too big (%u)", total_clusters); - brelse(bh); goto out_invalid; } @@ -1483,8 +1676,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (sbi->prev_free < FAT_START_ENT) sbi->prev_free = FAT_START_ENT; - brelse(bh); - /* set up enough so that it can read an inode */ fat_hash_init(sb); dir_hash_init(sb); diff --git a/fs/file_table.c b/fs/file_table.c index a374f5033e97..40bf4660f0a3 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -76,14 +76,14 @@ EXPORT_SYMBOL_GPL(get_max_files); * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -int proc_nr_files(ctl_table *table, int write, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #else -int proc_nr_files(ctl_table *table, int write, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index f7cff367db7f..56cce7fdd39e 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -280,15 +280,15 @@ int fscache_add_cache(struct fscache_cache *cache, spin_unlock(&fscache_fsdef_index.lock); up_write(&fscache_addremove_sem); - printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n", - cache->tag->name, cache->ops->name); + pr_notice("Cache \"%s\" added (type %s)\n", + cache->tag->name, cache->ops->name); kobject_uevent(cache->kobj, KOBJ_ADD); _leave(" = 0 [%s]", cache->identifier); return 0; tag_in_use: - printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname); + pr_err("Cache tag '%s' already in use\n", tagname); __fscache_release_cache_tag(tag); _leave(" = -EXIST"); return -EEXIST; @@ -317,8 +317,7 @@ EXPORT_SYMBOL(fscache_add_cache); void fscache_io_error(struct fscache_cache *cache) { if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags)) - printk(KERN_ERR "FS-Cache:" - " Cache '%s' stopped due to I/O error\n", + pr_err("Cache '%s' stopped due to I/O error\n", cache->ops->name); } EXPORT_SYMBOL(fscache_io_error); @@ -369,8 +368,8 @@ void fscache_withdraw_cache(struct fscache_cache *cache) _enter(""); - printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n", - cache->tag->name); + pr_notice("Withdrawing cache \"%s\"\n", + cache->tag->name); /* make the cache unavailable for cookie acquisition */ if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags)) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 29d7feb62cf7..aec01be91b0a 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -519,7 +519,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) ASSERTCMP(atomic_read(&cookie->n_active), >, 0); if (atomic_read(&cookie->n_children) != 0) { - printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", + pr_err("Cookie '%s' still has children\n", cookie->def->name); BUG(); } diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c index bad496748a59..7d637e2335fd 100644 --- a/fs/fscache/histogram.c +++ b/fs/fscache/histogram.c @@ -31,12 +31,10 @@ static int fscache_histogram_show(struct seq_file *m, void *v) switch ((unsigned long) v) { case 1: - seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS " - " RETRV DLY RETRIEVLS\n"); + seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS\n"); return 0; case 2: - seq_puts(m, "===== ===== ========= ========= =========" - " ========= =========\n"); + seq_puts(m, "===== ===== ========= ========= ========= ========= =========\n"); return 0; default: index = (unsigned long) v - 3; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 4226f6680b06..bc6c08fcfddd 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -22,6 +22,12 @@ * */ +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "FS-Cache: " fmt + #include <linux/fscache-cache.h> #include <linux/sched.h> @@ -413,8 +419,8 @@ do { \ #define ASSERT(X) \ do { \ if (unlikely(!(X))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) @@ -422,9 +428,9 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ if (unlikely(!((X) OP (Y)))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ - printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ (unsigned long)(X), (unsigned long)(Y)); \ BUG(); \ } \ @@ -433,8 +439,8 @@ do { \ #define ASSERTIF(C, X) \ do { \ if (unlikely((C) && !(X))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) @@ -442,9 +448,9 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ if (unlikely((C) && !((X) OP (Y)))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ - printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ (unsigned long)(X), (unsigned long)(Y)); \ BUG(); \ } \ diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 7c27907e650c..63f868e869b9 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write, return ret; } -ctl_table fscache_sysctls[] = { +struct ctl_table fscache_sysctls[] = { { .procname = "object_max_active", .data = &fscache_object_max_active, @@ -87,7 +87,7 @@ ctl_table fscache_sysctls[] = { {} }; -ctl_table fscache_sysctls_root[] = { +struct ctl_table fscache_sysctls_root[] = { { .procname = "fscache", .mode = 0555, @@ -146,8 +146,7 @@ static int __init fscache_init(void) 0, fscache_cookie_init_once); if (!fscache_cookie_jar) { - printk(KERN_NOTICE - "FS-Cache: Failed to allocate a cookie jar\n"); + pr_notice("Failed to allocate a cookie jar\n"); ret = -ENOMEM; goto error_cookie_jar; } @@ -156,7 +155,7 @@ static int __init fscache_init(void) if (!fscache_root) goto error_kobj; - printk(KERN_NOTICE "FS-Cache: Loaded\n"); + pr_notice("Loaded\n"); return 0; error_kobj: @@ -192,7 +191,7 @@ static void __exit fscache_exit(void) fscache_proc_cleanup(); destroy_workqueue(fscache_op_wq); destroy_workqueue(fscache_object_wq); - printk(KERN_NOTICE "FS-Cache: Unloaded\n"); + pr_notice("Unloaded\n"); } module_exit(fscache_exit); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 989f39401547..6d941f56faf4 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -65,8 +65,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) list_add(&netfs->link, &fscache_netfs_list); ret = 0; - printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n", - netfs->name); + pr_notice("Netfs '%s' registered for caching\n", netfs->name); already_registered: up_write(&fscache_addremove_sem); @@ -97,8 +96,8 @@ void __fscache_unregister_netfs(struct fscache_netfs *netfs) up_write(&fscache_addremove_sem); - printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n", - netfs->name); + pr_notice("Netfs '%s' unregistered from caching\n", + netfs->name); _leave(""); } diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index b5ebc2d7d80d..b8179ca6bf9d 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -285,20 +285,20 @@ static int fscache_objlist_show(struct seq_file *m, void *v) fscache_unuse_cookie(obj); if (keylen > 0 || auxlen > 0) { - seq_printf(m, " "); + seq_puts(m, " "); for (p = buf; keylen > 0; keylen--) seq_printf(m, "%02x", *p++); if (auxlen > 0) { if (config & FSCACHE_OBJLIST_CONFIG_KEY) - seq_printf(m, ", "); + seq_puts(m, ", "); for (; auxlen > 0; auxlen--) seq_printf(m, "%02x", *p++); } } - seq_printf(m, "\n"); + seq_puts(m, "\n"); } else { - seq_printf(m, "<no_netfs>\n"); + seq_puts(m, "<no_netfs>\n"); } return 0; } diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 318071aca217..e7b87a0e5185 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -51,8 +51,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) _debug("queue for caller's attention"); break; default: - printk(KERN_ERR "FS-Cache: Unexpected op type %lx", - op->flags); + pr_err("Unexpected op type %lx", op->flags); BUG(); break; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 7f5c658af755..ed70714503fa 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -1108,10 +1108,8 @@ void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) static bool once_only; if (!once_only) { once_only = true; - printk(KERN_WARNING "FS-Cache:" - " Cookie type %s marked page %lx" - " multiple times\n", - cookie->def->name, page->index); + pr_warn("Cookie type %s marked page %lx multiple times\n", + cookie->def->name, page->index); } } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index aac71ce373e4..098f97bdcf1b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1614,7 +1614,7 @@ out_finish: static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) { - release_pages(req->pages, req->num_pages, 0); + release_pages(req->pages, req->num_pages, false); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 96d513e01a5d..903cbc9cd6bd 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1089,8 +1089,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); flush_dcache_page(page); - mark_page_accessed(page); - if (!tmp) { unlock_page(page); page_cache_release(page); @@ -2304,7 +2302,6 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) struct fuse_file *ff = file->private_data; /* emulate flock with POSIX locks */ - fl->fl_owner = (fl_owner_t) file; ff->flock = true; err = fuse_setlk(file, fl, 1); } diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ce62dcac90b6..492123cda64a 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -431,7 +431,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, ret = gfs2_write_cache_jdata(mapping, wbc); if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); ret = gfs2_write_cache_jdata(mapping, wbc); } return ret; @@ -577,7 +577,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p); - mark_page_accessed(page); page_cache_release(page); copied += amt; index++; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c62d4b9f51dc..e6ee5b6e8d99 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -707,7 +707,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi * @top: The first pointer in the buffer * @bottom: One more than the last pointer * @height: the height this buffer is at - * @data: a pointer to a struct strip_mine + * @sm: a pointer to a struct strip_mine * * Returns: errno */ @@ -992,6 +992,8 @@ unlock: return err; } +#define GFS2_JTRUNC_REVOKES 8192 + /** * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files * @inode: The inode being truncated @@ -1003,8 +1005,6 @@ unlock: * if the number of pages being truncated gets too large. */ -#define GFS2_JTRUNC_REVOKES 8192 - static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1348,7 +1348,7 @@ void gfs2_free_journal_extents(struct gfs2_jdesc *jd) * gfs2_add_jextent - Add or merge a new extent to extent cache * @jd: The journal descriptor * @lblock: The logical block at start of new extent - * @pblock: The physical block at start of new extent + * @dblock: The physical block at start of new extent * @blocks: Size of extent in fs blocks * * Returns: 0 on success or -ENOMEM diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 80d67253623c..6ab0cfb2e891 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -203,9 +203,9 @@ void gfs2_set_inode_flags(struct inode *inode) GFS2_DIF_INHERIT_JDATA) /** - * gfs2_set_flags - set flags on an inode - * @inode: The inode - * @flags: The flags to set + * do_gfs2_set_flags - set flags on an inode + * @filp: file pointer + * @reqflags: The flags to set * @mask: Indicates which flags are valid * */ @@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) } if ((flags ^ new_flags) & GFS2_DIF_JDATA) { if (flags & GFS2_DIF_JDATA) - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); error = filemap_fdatawrite(inode->i_mapping); if (error) goto out; @@ -318,7 +318,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /** * gfs2_size_hint - Give a hint to the size of a write request - * @file: The struct file + * @filep: The struct file * @offset: The file offset of the write * @size: The length of the write * @@ -371,7 +371,7 @@ static int gfs2_allocate_page_backing(struct page *page) /** * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable * @vma: The virtual memory area - * @page: The page which is about to become writable + * @vmf: The virtual memory fault containing the page to become writable * * When the page becomes writable, we need to ensure that we have * blocks allocated on disk to back that page. diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index aec7f73832f0..c355f7320e44 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -277,7 +277,7 @@ static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holde static void gfs2_holder_wake(struct gfs2_holder *gh) { clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); } @@ -411,7 +411,7 @@ static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; clear_bit(GLF_DEMOTE, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&gl->gl_flags, GLF_DEMOTE); } @@ -620,7 +620,7 @@ out: out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); gl->gl_lockref.count++; if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gl->gl_lockref.count--; @@ -628,7 +628,7 @@ out_sched: out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 54b66809e818..fc1100781bbc 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -89,18 +89,23 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) if (!tr.tr_revokes) return; - /* A shortened, inline version of gfs2_trans_begin() */ + /* A shortened, inline version of gfs2_trans_begin() + * tr->alloced is not set since the transaction structure is + * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); sb_start_intwrite(sdp->sd_vfs); - gfs2_log_reserve(sdp, tr.tr_reserved); + if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { + sb_end_intwrite(sdp->sd_vfs); + return; + } WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; __gfs2_ail_flush(gl, 0, tr.tr_revokes); gfs2_trans_end(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) @@ -121,7 +126,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) return; __gfs2_ail_flush(gl, fsync, max_revokes); gfs2_trans_end(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } /** @@ -144,7 +149,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl) return; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(sdp, gl); + gfs2_log_flush(sdp, gl, NORMAL_FLUSH); filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); mapping_set_error(mapping, error); @@ -206,7 +211,7 @@ static void inode_go_sync(struct gfs2_glock *gl) GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl); + gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH); filemap_fdatawrite(metamapping); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; @@ -221,7 +226,7 @@ static void inode_go_sync(struct gfs2_glock *gl) * Writeback of the data mapping may cause the dirty flag to be set * so we have to clear it again here. */ - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(GLF_DIRTY, &gl->gl_flags); } @@ -253,7 +258,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { - gfs2_log_flush(gl->gl_sbd, NULL); + gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH); gl->gl_sbd->sd_rindex_uptodate = 0; } if (ip && S_ISREG(ip->i_inode.i_mode)) @@ -455,31 +460,39 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) } /** - * trans_go_sync - promote/demote the transaction glock + * freeze_go_sync - promote/demote the freeze glock * @gl: the glock * @state: the requested state * @flags: * */ -static void trans_go_sync(struct gfs2_glock *gl) +static void freeze_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; + DEFINE_WAIT(wait); - if (gl->gl_state != LM_ST_UNLOCKED && + if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); + atomic_set(&sdp->sd_log_freeze, 1); + wake_up(&sdp->sd_logd_waitq); + do { + prepare_to_wait(&sdp->sd_log_frozen_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (atomic_read(&sdp->sd_log_freeze)) + io_schedule(); + } while(atomic_read(&sdp->sd_log_freeze)); + finish_wait(&sdp->sd_log_frozen_wait, &wait); } } /** - * trans_go_xmote_bh - After promoting/demoting the transaction glock + * freeze_go_xmote_bh - After promoting/demoting the freeze glock * @gl: the glock * */ -static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) +static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) { struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); @@ -512,7 +525,7 @@ static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) * Always returns 0 */ -static int trans_go_demote_ok(const struct gfs2_glock *gl) +static int freeze_go_demote_ok(const struct gfs2_glock *gl) { return 0; } @@ -563,10 +576,10 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_flags = GLOF_LVB, }; -const struct gfs2_glock_operations gfs2_trans_glops = { - .go_sync = trans_go_sync, - .go_xmote_bh = trans_go_xmote_bh, - .go_demote_ok = trans_go_demote_ok, +const struct gfs2_glock_operations gfs2_freeze_glops = { + .go_sync = freeze_go_sync, + .go_xmote_bh = freeze_go_xmote_bh, + .go_demote_ok = freeze_go_demote_ok, .go_type = LM_TYPE_NONDISK, }; diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index bf95a2dc1662..7455d2629bcb 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -15,7 +15,7 @@ extern const struct gfs2_glock_operations gfs2_meta_glops; extern const struct gfs2_glock_operations gfs2_inode_glops; extern const struct gfs2_glock_operations gfs2_rgrp_glops; -extern const struct gfs2_glock_operations gfs2_trans_glops; +extern const struct gfs2_glock_operations gfs2_freeze_glops; extern const struct gfs2_glock_operations gfs2_iopen_glops; extern const struct gfs2_glock_operations gfs2_flock_glops; extern const struct gfs2_glock_operations gfs2_nondisk_glops; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index bdf70c18610c..67d310c9ada3 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -465,9 +465,7 @@ struct gfs2_trans { unsigned int tr_reserved; unsigned int tr_touched:1; unsigned int tr_attached:1; - - struct gfs2_holder tr_t_gh; - + unsigned int tr_alloced:1; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; @@ -682,7 +680,7 @@ struct gfs2_sbd { struct lm_lockstruct sd_lockstruct; struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; - struct gfs2_glock *sd_trans_gl; + struct gfs2_glock *sd_freeze_gl; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; @@ -730,6 +728,8 @@ struct gfs2_sbd { struct gfs2_holder sd_sc_gh; struct gfs2_holder sd_qc_gh; + struct completion sd_journal_ready; + /* Daemon stuff */ struct task_struct *sd_logd_process; @@ -794,6 +794,12 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; + struct gfs2_holder sd_freeze_root_gh; + struct gfs2_holder sd_thaw_gh; + atomic_t sd_log_freeze; + atomic_t sd_frozen_root; + wait_queue_head_t sd_frozen_root_wait; + wait_queue_head_t sd_log_frozen_wait; char sd_fsname[GFS2_FSNAME_LEN]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 28cc7bf6575a..e62e59477884 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1613,18 +1613,26 @@ int gfs2_permission(struct inode *inode, int mask) { struct gfs2_inode *ip; struct gfs2_holder i_gh; + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; + int frozen_root = 0; ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return error; - unlock = 1; + if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && + inode == sdp->sd_root_dir->d_inode && + atomic_inc_not_zero(&sdp->sd_frozen_root))) + frozen_root = 1; + else { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; + } } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) @@ -1633,6 +1641,8 @@ int gfs2_permission(struct inode *inode, int mask) error = generic_permission(inode, mask); if (unlock) gfs2_glock_dq_uninit(&i_gh); + else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) + wake_up(&sdp->sd_frozen_root_wait); return error; } @@ -1805,19 +1815,29 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; + int frozen_root = 0; if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) - return error; - unlock = 1; + if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && + inode == sdp->sd_root_dir->d_inode && + atomic_inc_not_zero(&sdp->sd_frozen_root))) + frozen_root = 1; + else { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; + } } generic_fillattr(inode, stat); if (unlock) gfs2_glock_dq_uninit(&gh); + else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) + wake_up(&sdp->sd_frozen_root_wait); return 0; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c1eb555dc588..91f274de1246 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1134,7 +1134,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); spin_unlock(&ls->ls_recover_spin); } @@ -1271,7 +1271,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); return 0; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 4a14d504ef83..3966fadbcebd 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -301,6 +301,23 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) } /** + * gfs2_log_release - Release a given number of log blocks + * @sdp: The GFS2 superblock + * @blks: The number of blocks + * + */ + +void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) +{ + + atomic_add(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, blks); + gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= + sdp->sd_jdesc->jd_blocks); + up_read(&sdp->sd_log_flush_lock); +} + +/** * gfs2_log_reserve - Make a log reservation * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve @@ -358,7 +375,10 @@ retry: wake_up(&sdp->sd_log_waitq); down_read(&sdp->sd_log_flush_lock); - + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + gfs2_log_release(sdp, blks); + return -EROFS; + } return 0; } @@ -671,7 +691,8 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) * */ -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + enum gfs2_flush_type type) { struct gfs2_trans *tr; @@ -723,6 +744,42 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) } spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (atomic_read(&sdp->sd_log_freeze)) + type = FREEZE_FLUSH; + if (type != NORMAL_FLUSH) { + if (!sdp->sd_log_idle) { + for (;;) { + gfs2_ail1_start(sdp); + gfs2_ail1_wait(sdp); + if (gfs2_ail1_empty(sdp)) + break; + } + atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ + trace_gfs2_log_blocks(sdp, -1); + sdp->sd_log_flush_wrapped = 0; + log_write_header(sdp, 0); + sdp->sd_log_head = sdp->sd_log_flush_head; + } + if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) + gfs2_log_shutdown(sdp); + if (type == FREEZE_FLUSH) { + int error; + + atomic_set(&sdp->sd_log_freeze, 0); + wake_up(&sdp->sd_log_frozen_wait); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, 0, + &sdp->sd_thaw_gh); + if (error) { + printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + else + gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); + } + } + trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); @@ -761,7 +818,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) if (sdp->sd_log_tr) { gfs2_merge_trans(sdp->sd_log_tr, tr); } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { - gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl); + gfs2_assert_withdraw(sdp, tr->tr_alloced); sdp->sd_log_tr = tr; tr->tr_attached = 1; } @@ -813,8 +870,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) void gfs2_log_shutdown(struct gfs2_sbd *sdp) { - down_write(&sdp->sd_log_flush_lock); - gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); @@ -824,38 +879,16 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT); - gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); sdp->sd_log_head = sdp->sd_log_flush_head; sdp->sd_log_tail = sdp->sd_log_head; - - up_write(&sdp->sd_log_flush_lock); -} - - -/** - * gfs2_meta_syncfs - sync all the buffers in a filesystem - * @sdp: the filesystem - * - */ - -void gfs2_meta_syncfs(struct gfs2_sbd *sdp) -{ - gfs2_log_flush(sdp, NULL); - for (;;) { - gfs2_ail1_start(sdp); - gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp)) - break; - } - gfs2_log_flush(sdp, NULL); } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { - return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); + return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze)); } static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) @@ -882,14 +915,14 @@ int gfs2_logd(void *data) if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } if (!gfs2_ail_flush_reqd(sdp)) diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 37216634f0aa..9499a6049212 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -63,14 +63,21 @@ extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize); +extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); -extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); +enum gfs2_flush_type { + NORMAL_FLUSH = 0, + SYNC_FLUSH, + SHUTDOWN_FLUSH, + FREEZE_FLUSH +}; +extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + enum gfs2_flush_type type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); -extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); extern void gfs2_write_revokes(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a294d8d8bcd4..2c1ae861dc94 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -75,7 +75,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; - if (bi->bi_clone == 0) + if (bi->bi_clone == NULL) return; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 2cf09b63a6b4..b984a6e190bc 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -136,7 +136,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) yield(); } } else { - page = find_lock_page(mapping, index); + page = find_get_page_flags(mapping, index, + FGP_LOCK|FGP_ACCESSED); if (!page) return NULL; } @@ -153,7 +154,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) map_bh(bh, sdp->sd_vfs, blkno); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); return bh; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 22f954051bb8..bc564c0d6d16 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -94,6 +94,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_jindex_list); spin_lock_init(&sdp->sd_jindex_spin); mutex_init(&sdp->sd_jindex_mutex); + init_completion(&sdp->sd_journal_ready); INIT_LIST_HEAD(&sdp->sd_quota_list); mutex_init(&sdp->sd_quota_mutex); @@ -129,6 +130,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); + init_waitqueue_head(&sdp->sd_log_frozen_wait); + atomic_set(&sdp->sd_log_freeze, 0); + atomic_set(&sdp->sd_frozen_root, 0); + init_waitqueue_head(&sdp->sd_frozen_root_wait); return sdp; } @@ -419,8 +424,8 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, goto fail_live; } - error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops, - CREATE, &sdp->sd_trans_gl); + error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops, + CREATE, &sdp->sd_freeze_gl); if (error) { fs_err(sdp, "can't create transaction glock: %d\n", error); goto fail_rename; @@ -429,7 +434,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, return 0; fail_trans: - gfs2_glock_put(sdp->sd_trans_gl); + gfs2_glock_put(sdp->sd_freeze_gl); fail_rename: gfs2_glock_put(sdp->sd_rename_gl); fail_live: @@ -755,7 +760,15 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - + if (!sdp->sd_args.ar_spectator) { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &sdp->sd_thaw_gh); + if (error) { + fs_err(sdp, "can't acquire freeze glock: %d\n", error); + goto fail_jinode_gh; + } + } + gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); return 0; fail_jinode_gh: @@ -784,6 +797,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) goto fail_qinode; error = init_journal(sdp, undo); + complete_all(&sdp->sd_journal_ready); if (error) goto fail; @@ -1200,6 +1214,7 @@ fail_sb: fail_locking: init_locking(sdp, &mount_gh, UNDO); fail_lm: + complete_all(&sdp->sd_journal_ready); gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); fail_debug: @@ -1380,7 +1395,7 @@ static void gfs2_kill_sb(struct super_block *sb) return; } - gfs2_meta_syncfs(sdp); + gfs2_log_flush(sdp, NULL, SYNC_FLUSH); dput(sdp->sd_root_dir); dput(sdp->sd_master_dir); sdp->sd_root_dir = NULL; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c4effff7cf55..64b29f7f6b4c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -778,6 +778,7 @@ get_a_page: i_size_write(inode, size); inode->i_mtime = inode->i_atime = CURRENT_TIME; mark_inode_dirty(inode); + set_bit(QDF_REFRESH, &qd->qd_flags); return 0; unlock_out: @@ -879,7 +880,7 @@ out: gfs2_glock_dq_uninit(&ghs[qx]); mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); - gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); + gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl, NORMAL_FLUSH); return error; } diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 7ad4094d68c0..94555d4c5698 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -454,7 +454,7 @@ void gfs2_recover_func(struct work_struct *work) struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; - struct gfs2_holder j_gh, ji_gh, t_gh; + struct gfs2_holder j_gh, ji_gh, thaw_gh; unsigned long t; int ro = 0; unsigned int pass; @@ -508,11 +508,11 @@ void gfs2_recover_func(struct work_struct *work) t = jiffies; - /* Acquire a shared hold on the transaction lock */ + /* Acquire a shared hold on the freeze lock */ - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY | - GL_NOCACHE, &t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | LM_FLAG_PRIORITY, + &thaw_gh); if (error) goto fail_gunlock_ji; @@ -538,7 +538,7 @@ void gfs2_recover_func(struct work_struct *work) fs_warn(sdp, "jid=%u: Can't replay: read-only block " "device\n", jd->jd_jid); error = -EROFS; - goto fail_gunlock_tr; + goto fail_gunlock_thaw; } fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); @@ -549,14 +549,14 @@ void gfs2_recover_func(struct work_struct *work) head.lh_blkno, pass); lops_after_scan(jd, error, pass); if (error) - goto fail_gunlock_tr; + goto fail_gunlock_thaw; } error = clean_journal(jd, &head); if (error) - goto fail_gunlock_tr; + goto fail_gunlock_thaw; - gfs2_glock_dq_uninit(&t_gh); + gfs2_glock_dq_uninit(&thaw_gh); t = DIV_ROUND_UP(jiffies - t, HZ); fs_info(sdp, "jid=%u: Journal replayed in %lus\n", jd->jd_jid, t); @@ -572,8 +572,8 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; -fail_gunlock_tr: - gfs2_glock_dq_uninit(&t_gh); +fail_gunlock_thaw: + gfs2_glock_dq_uninit(&thaw_gh); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); @@ -587,7 +587,7 @@ fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jd->jd_flags, JDF_RECOVERY); } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 281a7716e3f3..db629d1bd1bd 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2001,7 +2001,7 @@ next_rgrp: } /* Flushing the log may release space */ if (loops == 2) - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } return -ENOSPC; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index de8afad89e51..1319b5c4ec68 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -399,7 +399,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder t_gh; + struct gfs2_holder thaw_gh; struct gfs2_log_header_host head; int error; @@ -407,7 +407,8 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) if (error) return error; - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &thaw_gh); if (error) goto fail_threads; @@ -433,13 +434,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&t_gh); + gfs2_glock_dq_uninit(&thaw_gh); return 0; fail: - t_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&t_gh); + thaw_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&thaw_gh); fail_threads: kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); @@ -635,15 +636,21 @@ struct lfcc { */ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, - struct gfs2_holder *t_gh) + struct gfs2_holder *freeze_gh) { struct gfs2_inode *ip; struct gfs2_jdesc *jd; struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; + struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode); int error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, + &sdp->sd_freeze_root_gh); + if (error) + return error; + atomic_set(&sdp->sd_frozen_root, 1); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); if (!lfcc) { @@ -659,8 +666,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, list_add(&lfcc->list, &list); } - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, - GL_NOCACHE, t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, freeze_gh); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); @@ -676,7 +683,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, } if (error) - gfs2_glock_dq_uninit(t_gh); + gfs2_glock_dq_uninit(freeze_gh); out: while (!list_empty(&list)) { @@ -685,6 +692,11 @@ out: gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); } + if (error) { + atomic_dec(&sdp->sd_frozen_root); + wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); + gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); + } return error; } @@ -742,7 +754,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + gfs2_log_flush(GFS2_SB(inode), ip->i_gl, NORMAL_FLUSH); if (bdi->dirty_exceeded) gfs2_ail1_flush(sdp, wbc); else @@ -822,9 +834,18 @@ out: static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder t_gh; + struct gfs2_holder thaw_gh; int error; + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, + &thaw_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + down_write(&sdp->sd_log_flush_lock); + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); + kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); @@ -832,18 +853,11 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return error; - - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); + if (thaw_gh.gh_gl) + gfs2_glock_dq_uninit(&thaw_gh); gfs2_quota_cleanup(sdp); @@ -900,7 +914,7 @@ restart: iput(sdp->sd_quota_inode); gfs2_glock_put(sdp->sd_rename_gl); - gfs2_glock_put(sdp->sd_trans_gl); + gfs2_glock_put(sdp->sd_freeze_gl); if (!sdp->sd_args.ar_spectator) { gfs2_glock_dq_uninit(&sdp->sd_journal_gh); @@ -935,8 +949,8 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); - if (wait && sdp) - gfs2_log_flush(sdp, NULL); + if (wait && sdp && !atomic_read(&sdp->sd_log_freeze)) + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); return 0; } @@ -986,6 +1000,9 @@ static int gfs2_unfreeze(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + atomic_dec(&sdp->sd_frozen_root); + wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); + gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); return 0; } @@ -1525,7 +1542,7 @@ static void gfs2_evict_inode(struct inode *inode) goto out_unlock; out_truncate: - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); filemap_fdatawrite(metamapping); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index de25d5577e5d..3ab566ba5696 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -240,8 +240,8 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len if (gltype > LM_TYPE_JOURNAL) return -EINVAL; - if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) - glops = &gfs2_trans_glops; + if (gltype == LM_TYPE_NONDISK && glnum == GFS2_FREEZE_LOCK) + glops = &gfs2_freeze_glops; else glops = gfs2_glops_list[gltype]; if (glops == NULL) @@ -333,7 +333,7 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); else if (val == 0) { clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); gfs2_glock_thaw(sdp); } else { ret = -EINVAL; @@ -407,6 +407,9 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) struct gfs2_jdesc *jd; int rv; + /* Wait for our primary journal to be initialized */ + wait_for_completion(&sdp->sd_journal_ready); + spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; if (sdp->sd_jdesc->jd_jid == jid) @@ -482,7 +485,7 @@ static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = jid = -EINVAL; sdp->sd_lockstruct.ls_jid = jid; clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); out: spin_unlock(&sdp->sd_jindex_spin); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index bead90d27bad..0546ab4e28e8 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -48,6 +48,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, tr->tr_blocks = blocks; tr->tr_revokes = revokes; tr->tr_reserved = 1; + tr->tr_alloced = 1; if (blocks) tr->tr_reserved += 6 + blocks; if (revokes) @@ -57,48 +58,22 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, INIT_LIST_HEAD(&tr->tr_buf); sb_start_intwrite(sdp->sd_vfs); - gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); - - error = gfs2_glock_nq(&tr->tr_t_gh); - if (error) - goto fail_holder_uninit; error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) - goto fail_gunlock; + goto fail; current->journal_info = tr; return 0; -fail_gunlock: - gfs2_glock_dq(&tr->tr_t_gh); - -fail_holder_uninit: +fail: sb_end_intwrite(sdp->sd_vfs); - gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); return error; } -/** - * gfs2_log_release - Release a given number of log blocks - * @sdp: The GFS2 superblock - * @blks: The number of blocks - * - */ - -static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) -{ - - atomic_add(blks, &sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, blks); - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= - sdp->sd_jdesc->jd_blocks); - up_read(&sdp->sd_log_flush_lock); -} - static void gfs2_print_trans(const struct gfs2_trans *tr) { pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip); @@ -119,11 +94,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (!tr->tr_touched) { gfs2_log_release(sdp, tr->tr_reserved); - if (tr->tr_t_gh.gh_gl) { - gfs2_glock_dq(&tr->tr_t_gh); - gfs2_holder_uninit(&tr->tr_t_gh); + if (tr->tr_alloced) kfree(tr); - } sb_end_intwrite(sdp->sd_vfs); return; } @@ -137,16 +109,12 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); - if (tr->tr_t_gh.gh_gl) { - gfs2_glock_dq(&tr->tr_t_gh); - gfs2_holder_uninit(&tr->tr_t_gh); - if (!tr->tr_attached) + if (tr->tr_alloced && !tr->tr_attached) kfree(tr); - } up_read(&sdp->sd_log_flush_lock); if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); sb_end_intwrite(sdp->sd_vfs); } diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index caf89a7be0a1..e5b221de7de6 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -54,14 +54,11 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, memset(key, 0, sizeof(struct hfsplus_attr_key)); key->attr.cnid = cpu_to_be32(cnid); if (name) { - len = strlen(name); - if (len > HFSPLUS_ATTR_MAX_STRLEN) { - pr_err("invalid xattr name's length\n"); - return -EINVAL; - } - hfsplus_asc2uni(sb, + int res = hfsplus_asc2uni(sb, (struct hfsplus_unistr *)&key->attr.key_name, - HFSPLUS_ATTR_MAX_STRLEN, name, len); + HFSPLUS_ATTR_MAX_STRLEN, name, strlen(name)); + if (res) + return res; len = be16_to_cpu(key->attr.key_name.length); } else { key->attr.key_name.length = 0; @@ -82,31 +79,6 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, return 0; } -void hfsplus_attr_build_key_uni(hfsplus_btree_key *key, - u32 cnid, - struct hfsplus_attr_unistr *name) -{ - int ustrlen; - - memset(key, 0, sizeof(struct hfsplus_attr_key)); - ustrlen = be16_to_cpu(name->length); - key->attr.cnid = cpu_to_be32(cnid); - key->attr.key_name.length = cpu_to_be16(ustrlen); - ustrlen *= 2; - memcpy(key->attr.key_name.unicode, name->unicode, ustrlen); - - /* The length of the key, as stored in key_len field, does not include - * the size of the key_len field itself. - * So, offsetof(hfsplus_attr_key, key_name) is a trick because - * it takes into consideration key_len field (__be16) of - * hfsplus_attr_key structure instead of length field (__be16) of - * hfsplus_attr_unistr structure. - */ - key->key_len = - cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) + - ustrlen); -} - hfsplus_attr_entry *hfsplus_alloc_attr_entry(void) { return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL); diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 11c860204520..759708fd9331 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -27,13 +27,13 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) pagep = node->page + (off >> PAGE_CACHE_SHIFT); off &= ~PAGE_CACHE_MASK; - l = min(len, (int)PAGE_CACHE_SIZE - off); + l = min_t(int, len, PAGE_CACHE_SIZE - off); memcpy(buf, kmap(*pagep) + off, l); kunmap(*pagep); while ((len -= l) != 0) { buf += l; - l = min(len, (int)PAGE_CACHE_SIZE); + l = min_t(int, len, PAGE_CACHE_SIZE); memcpy(buf, kmap(*++pagep), l); kunmap(*pagep); } @@ -80,14 +80,14 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len) pagep = node->page + (off >> PAGE_CACHE_SHIFT); off &= ~PAGE_CACHE_MASK; - l = min(len, (int)PAGE_CACHE_SIZE - off); + l = min_t(int, len, PAGE_CACHE_SIZE - off); memcpy(kmap(*pagep) + off, buf, l); set_page_dirty(*pagep); kunmap(*pagep); while ((len -= l) != 0) { buf += l; - l = min(len, (int)PAGE_CACHE_SIZE); + l = min_t(int, len, PAGE_CACHE_SIZE); memcpy(kmap(*++pagep), buf, l); set_page_dirty(*pagep); kunmap(*pagep); @@ -110,13 +110,13 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len) pagep = node->page + (off >> PAGE_CACHE_SHIFT); off &= ~PAGE_CACHE_MASK; - l = min(len, (int)PAGE_CACHE_SIZE - off); + l = min_t(int, len, PAGE_CACHE_SIZE - off); memset(kmap(*pagep) + off, 0, l); set_page_dirty(*pagep); kunmap(*pagep); while ((len -= l) != 0) { - l = min(len, (int)PAGE_CACHE_SIZE); + l = min_t(int, len, PAGE_CACHE_SIZE); memset(kmap(*++pagep), 0, l); set_page_dirty(*pagep); kunmap(*pagep); @@ -142,14 +142,14 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, dst &= ~PAGE_CACHE_MASK; if (src == dst) { - l = min(len, (int)PAGE_CACHE_SIZE - src); + l = min_t(int, len, PAGE_CACHE_SIZE - src); memcpy(kmap(*dst_page) + src, kmap(*src_page) + src, l); kunmap(*src_page); set_page_dirty(*dst_page); kunmap(*dst_page); while ((len -= l) != 0) { - l = min(len, (int)PAGE_CACHE_SIZE); + l = min_t(int, len, PAGE_CACHE_SIZE); memcpy(kmap(*++dst_page), kmap(*++src_page), l); kunmap(*src_page); set_page_dirty(*dst_page); @@ -251,7 +251,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) dst &= ~PAGE_CACHE_MASK; if (src == dst) { - l = min(len, (int)PAGE_CACHE_SIZE - src); + l = min_t(int, len, PAGE_CACHE_SIZE - src); memmove(kmap(*dst_page) + src, kmap(*src_page) + src, l); kunmap(*src_page); @@ -259,7 +259,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len) kunmap(*dst_page); while ((len -= l) != 0) { - l = min(len, (int)PAGE_CACHE_SIZE); + l = min_t(int, len, PAGE_CACHE_SIZE); memmove(kmap(*++dst_page), kmap(*++src_page), l); kunmap(*src_page); @@ -386,9 +386,8 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) struct hfs_bnode *node; if (cnid >= tree->node_count) { - pr_err("request for non-existent node " - "%d in B*Tree\n", - cnid); + pr_err("request for non-existent node %d in B*Tree\n", + cnid); return NULL; } @@ -409,9 +408,8 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) loff_t off; if (cnid >= tree->node_count) { - pr_err("request for non-existent node " - "%d in B*Tree\n", - cnid); + pr_err("request for non-existent node %d in B*Tree\n", + cnid); return NULL; } @@ -602,7 +600,7 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num) pagep = node->page; memset(kmap(*pagep) + node->page_offset, 0, - min((int)PAGE_CACHE_SIZE, (int)tree->node_size)); + min_t(int, PAGE_CACHE_SIZE, tree->node_size)); set_page_dirty(*pagep); kunmap(*pagep); for (i = 1; i < tree->pages_per_bnode; i++) { @@ -648,8 +646,8 @@ void hfs_bnode_put(struct hfs_bnode *node) if (test_bit(HFS_BNODE_DELETED, &node->flags)) { hfs_bnode_unhash(node); spin_unlock(&tree->hash_lock); - hfs_bnode_clear(node, 0, - PAGE_CACHE_SIZE * tree->pages_per_bnode); + if (hfs_bnode_need_zeroout(tree)) + hfs_bnode_clear(node, 0, tree->node_size); hfs_bmap_free(node); hfs_bnode_free(node); return; @@ -658,3 +656,16 @@ void hfs_bnode_put(struct hfs_bnode *node) } } +/* + * Unused nodes have to be zeroed if this is the catalog tree and + * a corresponding flag in the volume header is set. + */ +bool hfs_bnode_need_zeroout(struct hfs_btree *tree) +{ + struct super_block *sb = tree->inode->i_sb; + struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); + const u32 volume_attr = be32_to_cpu(sbi->s_vhdr->attributes); + + return tree->cnid == HFSPLUS_CAT_CNID && + volume_attr & HFSPLUS_VOL_UNUSED_NODE_FIX; +} diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 0fcec8b2a90b..3345c7553edc 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -358,7 +358,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) u32 count; int res; - res = hfsplus_file_extend(inode); + res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree)); if (res) return ERR_PTR(res); hip->phys_size = inode->i_size = diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index bdec66522de3..610a3260bef1 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/random.h> +#include <linux/nls.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" @@ -127,7 +128,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; int len, err; - char strbuf[HFSPLUS_MAX_STRLEN + 1]; + char *strbuf; hfsplus_cat_entry entry; struct hfs_find_data fd; struct hfsplus_readdir_data *rd; @@ -139,6 +140,11 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) return err; + strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN + 1, GFP_KERNEL); + if (!strbuf) { + err = -ENOMEM; + goto out; + } hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) @@ -193,7 +199,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); type = be16_to_cpu(entry.type); - len = HFSPLUS_MAX_STRLEN; + len = NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN; err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len); if (err) goto out; @@ -212,13 +218,31 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) be32_to_cpu(entry.folder.id), DT_DIR)) break; } else if (type == HFSPLUS_FILE) { + u16 mode; + unsigned type = DT_UNKNOWN; + if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { pr_err("small file entry\n"); err = -EIO; goto out; } + + mode = be16_to_cpu(entry.file.permissions.mode); + if (S_ISREG(mode)) + type = DT_REG; + else if (S_ISLNK(mode)) + type = DT_LNK; + else if (S_ISFIFO(mode)) + type = DT_FIFO; + else if (S_ISCHR(mode)) + type = DT_CHR; + else if (S_ISBLK(mode)) + type = DT_BLK; + else if (S_ISSOCK(mode)) + type = DT_SOCK; + if (!dir_emit(ctx, strbuf, len, - be32_to_cpu(entry.file.id), DT_REG)) + be32_to_cpu(entry.file.id), type)) break; } else { pr_err("bad catalog entry type\n"); @@ -246,6 +270,7 @@ next: } memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); out: + kfree(strbuf); hfs_find_exit(&fd); return err; } diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index a7aafb35b624..feca524ce2a5 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -235,7 +235,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, if (iblock > hip->fs_blocks || !create) return -EIO; if (ablock >= hip->alloc_blocks) { - res = hfsplus_file_extend(inode); + res = hfsplus_file_extend(inode, false); if (res) return res; } @@ -425,7 +425,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, return res; } -int hfsplus_file_extend(struct inode *inode) +int hfsplus_file_extend(struct inode *inode, bool zeroout) { struct super_block *sb = inode->i_sb; struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); @@ -436,10 +436,9 @@ int hfsplus_file_extend(struct inode *inode) if (sbi->alloc_file->i_size * 8 < sbi->total_blocks - sbi->free_blocks + 8) { /* extend alloc file */ - pr_err("extend alloc file! " - "(%llu,%u,%u)\n", - sbi->alloc_file->i_size * 8, - sbi->total_blocks, sbi->free_blocks); + pr_err("extend alloc file! (%llu,%u,%u)\n", + sbi->alloc_file->i_size * 8, + sbi->total_blocks, sbi->free_blocks); return -ENOSPC; } @@ -463,6 +462,12 @@ int hfsplus_file_extend(struct inode *inode) } } + if (zeroout) { + res = sb_issue_zeroout(sb, start, len, GFP_NOFS); + if (res) + goto out; + } + hfs_dbg(EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); if (hip->alloc_blocks <= hip->first_blocks) { diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 83dc29286b10..eb5e059f481a 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -369,114 +369,119 @@ typedef int (*search_strategy_t)(struct hfs_bnode *, /* attributes.c */ int __init hfsplus_create_attr_tree_cache(void); void hfsplus_destroy_attr_tree_cache(void); +int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1, + const hfsplus_btree_key *k2); +int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, + u32 cnid, const char *name); hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); -void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); -int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *, - const hfsplus_btree_key *); -int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *, - u32, const char *); -void hfsplus_attr_build_key_uni(hfsplus_btree_key *key, - u32 cnid, - struct hfsplus_attr_unistr *name); -int hfsplus_find_attr(struct super_block *, u32, - const char *, struct hfs_find_data *); +void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry); +int hfsplus_find_attr(struct super_block *sb, u32 cnid, const char *name, + struct hfs_find_data *fd); int hfsplus_attr_exists(struct inode *inode, const char *name); -int hfsplus_create_attr(struct inode *, const char *, const void *, size_t); -int hfsplus_delete_attr(struct inode *, const char *); +int hfsplus_create_attr(struct inode *inode, const char *name, + const void *value, size_t size); +int hfsplus_delete_attr(struct inode *inode, const char *name); int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid); /* bitmap.c */ -int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *); -int hfsplus_block_free(struct super_block *, u32, u32); +int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, + u32 *max); +int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count); /* btree.c */ -u32 hfsplus_calc_btree_clump_size(u32, u32, u64, int); -struct hfs_btree *hfs_btree_open(struct super_block *, u32); -void hfs_btree_close(struct hfs_btree *); -int hfs_btree_write(struct hfs_btree *); -struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *); -void hfs_bmap_free(struct hfs_bnode *); +u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size, u64 sectors, + int file_id); +struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id); +void hfs_btree_close(struct hfs_btree *tree); +int hfs_btree_write(struct hfs_btree *tree); +struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree); +void hfs_bmap_free(struct hfs_bnode *node); /* bnode.c */ -void hfs_bnode_read(struct hfs_bnode *, void *, int, int); -u16 hfs_bnode_read_u16(struct hfs_bnode *, int); -u8 hfs_bnode_read_u8(struct hfs_bnode *, int); -void hfs_bnode_read_key(struct hfs_bnode *, void *, int); -void hfs_bnode_write(struct hfs_bnode *, void *, int, int); -void hfs_bnode_write_u16(struct hfs_bnode *, int, u16); -void hfs_bnode_clear(struct hfs_bnode *, int, int); -void hfs_bnode_copy(struct hfs_bnode *, int, - struct hfs_bnode *, int, int); -void hfs_bnode_move(struct hfs_bnode *, int, int, int); -void hfs_bnode_dump(struct hfs_bnode *); -void hfs_bnode_unlink(struct hfs_bnode *); -struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *, u32); -struct hfs_bnode *hfs_bnode_find(struct hfs_btree *, u32); -void hfs_bnode_unhash(struct hfs_bnode *); -void hfs_bnode_free(struct hfs_bnode *); -struct hfs_bnode *hfs_bnode_create(struct hfs_btree *, u32); -void hfs_bnode_get(struct hfs_bnode *); -void hfs_bnode_put(struct hfs_bnode *); +void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len); +u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off); +u8 hfs_bnode_read_u8(struct hfs_bnode *node, int off); +void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off); +void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len); +void hfs_bnode_write_u16(struct hfs_bnode *node, int off, u16 data); +void hfs_bnode_clear(struct hfs_bnode *node, int off, int len); +void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, + struct hfs_bnode *src_node, int src, int len); +void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len); +void hfs_bnode_dump(struct hfs_bnode *node); +void hfs_bnode_unlink(struct hfs_bnode *node); +struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid); +void hfs_bnode_unhash(struct hfs_bnode *node); +struct hfs_bnode *hfs_bnode_find(struct hfs_btree *tree, u32 num); +void hfs_bnode_free(struct hfs_bnode *node); +struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num); +void hfs_bnode_get(struct hfs_bnode *node); +void hfs_bnode_put(struct hfs_bnode *node); +bool hfs_bnode_need_zeroout(struct hfs_btree *tree); /* brec.c */ -u16 hfs_brec_lenoff(struct hfs_bnode *, u16, u16 *); -u16 hfs_brec_keylen(struct hfs_bnode *, u16); -int hfs_brec_insert(struct hfs_find_data *, void *, int); -int hfs_brec_remove(struct hfs_find_data *); +u16 hfs_brec_lenoff(struct hfs_bnode *node, u16 rec, u16 *off); +u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec); +int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len); +int hfs_brec_remove(struct hfs_find_data *fd); /* bfind.c */ -int hfs_find_init(struct hfs_btree *, struct hfs_find_data *); -void hfs_find_exit(struct hfs_find_data *); -int hfs_find_1st_rec_by_cnid(struct hfs_bnode *, - struct hfs_find_data *, - int *, int *, int *); -int hfs_find_rec_by_key(struct hfs_bnode *, - struct hfs_find_data *, - int *, int *, int *); -int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *, - search_strategy_t); -int hfs_brec_find(struct hfs_find_data *, search_strategy_t); -int hfs_brec_read(struct hfs_find_data *, void *, int); -int hfs_brec_goto(struct hfs_find_data *, int); +int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd); +void hfs_find_exit(struct hfs_find_data *fd); +int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, struct hfs_find_data *fd, + int *begin, int *end, int *cur_rec); +int hfs_find_rec_by_key(struct hfs_bnode *bnode, struct hfs_find_data *fd, + int *begin, int *end, int *cur_rec); +int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd, + search_strategy_t rec_found); +int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare); +int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len); +int hfs_brec_goto(struct hfs_find_data *fd, int cnt); /* catalog.c */ -int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, - const hfsplus_btree_key *); -int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, - const hfsplus_btree_key *); -void hfsplus_cat_build_key(struct super_block *sb, - hfsplus_btree_key *, u32, struct qstr *); -int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *); -int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); -int hfsplus_delete_cat(u32, struct inode *, struct qstr *); -int hfsplus_rename_cat(u32, struct inode *, struct qstr *, - struct inode *, struct qstr *); +int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1, + const hfsplus_btree_key *k2); +int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, + const hfsplus_btree_key *k2); +void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, + u32 parent, struct qstr *str); void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); +int hfsplus_find_cat(struct super_block *sb, u32 cnid, + struct hfs_find_data *fd); +int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, + struct inode *inode); +int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str); +int hfsplus_rename_cat(u32 cnid, struct inode *src_dir, struct qstr *src_name, + struct inode *dst_dir, struct qstr *dst_name); /* dir.c */ extern const struct inode_operations hfsplus_dir_inode_operations; extern const struct file_operations hfsplus_dir_operations; /* extents.c */ -int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -int hfsplus_ext_write_extent(struct inode *); -int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); -int hfsplus_free_fork(struct super_block *, u32, - struct hfsplus_fork_raw *, int); -int hfsplus_file_extend(struct inode *); -void hfsplus_file_truncate(struct inode *); +int hfsplus_ext_cmp_key(const hfsplus_btree_key *k1, + const hfsplus_btree_key *k2); +int hfsplus_ext_write_extent(struct inode *inode); +int hfsplus_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); +int hfsplus_free_fork(struct super_block *sb, u32 cnid, + struct hfsplus_fork_raw *fork, int type); +int hfsplus_file_extend(struct inode *inode, bool zeroout); +void hfsplus_file_truncate(struct inode *inode); /* inode.c */ extern const struct address_space_operations hfsplus_aops; extern const struct address_space_operations hfsplus_btree_aops; extern const struct dentry_operations hfsplus_dentry_operations; -void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); -void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); -int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *); -int hfsplus_cat_write_inode(struct inode *); -struct inode *hfsplus_new_inode(struct super_block *, umode_t); -void hfsplus_delete_inode(struct inode *); +struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode); +void hfsplus_delete_inode(struct inode *inode); +void hfsplus_inode_read_fork(struct inode *inode, + struct hfsplus_fork_raw *fork); +void hfsplus_inode_write_fork(struct inode *inode, + struct hfsplus_fork_raw *fork); +int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd); +int hfsplus_cat_write_inode(struct inode *inode); int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); @@ -484,13 +489,17 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); /* options.c */ -int hfsplus_parse_options(char *, struct hfsplus_sb_info *); +void hfsplus_fill_defaults(struct hfsplus_sb_info *opts); int hfsplus_parse_options_remount(char *input, int *force); -void hfsplus_fill_defaults(struct hfsplus_sb_info *); -int hfsplus_show_options(struct seq_file *, struct dentry *); +int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi); +int hfsplus_show_options(struct seq_file *seq, struct dentry *root); + +/* part_tbl.c */ +int hfs_part_find(struct super_block *sb, sector_t *part_start, + sector_t *part_size); /* super.c */ -struct inode *hfsplus_iget(struct super_block *, unsigned long); +struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino); void hfsplus_mark_mdb_dirty(struct super_block *sb); /* tables.c */ @@ -499,23 +508,23 @@ extern u16 hfsplus_decompose_table[]; extern u16 hfsplus_compose_table[]; /* unicode.c */ -int hfsplus_strcasecmp(const struct hfsplus_unistr *, - const struct hfsplus_unistr *); -int hfsplus_strcmp(const struct hfsplus_unistr *, - const struct hfsplus_unistr *); -int hfsplus_uni2asc(struct super_block *, - const struct hfsplus_unistr *, char *, int *); -int hfsplus_asc2uni(struct super_block *, - struct hfsplus_unistr *, int, const char *, int); +int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, + const struct hfsplus_unistr *s2); +int hfsplus_strcmp(const struct hfsplus_unistr *s1, + const struct hfsplus_unistr *s2); +int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, + char *astr, int *len_p); +int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, + int max_unistr_len, const char *astr, int len); int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str); -int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name); +int hfsplus_compare_dentry(const struct dentry *parent, + const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); /* wrapper.c */ -int hfsplus_read_wrapper(struct super_block *); -int hfs_part_find(struct super_block *, sector_t *, sector_t *); -int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int rw); +int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, + void **data, int rw); +int hfsplus_read_wrapper(struct super_block *sb); /* time macros */ #define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 5a126828d85e..8298d0985f81 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -144,6 +144,7 @@ struct hfsplus_vh { #define HFSPLUS_VOL_NODEID_REUSED (1 << 12) #define HFSPLUS_VOL_JOURNALED (1 << 13) #define HFSPLUS_VOL_SOFTLOCK (1 << 15) +#define HFSPLUS_VOL_UNUSED_NODE_FIX (1 << 31) /* HFS+ BTree node descriptor */ struct hfs_bnode_desc { diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 68537e8b7a09..c90b72ee676d 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -173,9 +173,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) if (p) sbi->nls = load_nls(p); if (!sbi->nls) { - pr_err("unable to load " - "nls mapping \"%s\"\n", - p); + pr_err("unable to load nls mapping \"%s\"\n", + p); kfree(p); return 0; } @@ -232,8 +231,8 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root) if (sbi->nls) seq_printf(seq, ",nls=%s", sbi->nls->charset); if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) - seq_printf(seq, ",nodecompose"); + seq_puts(seq, ",nodecompose"); if (test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) - seq_printf(seq, ",nobarrier"); + seq_puts(seq, ",nobarrier"); return 0; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index a513d2d36be9..4cf2024b87da 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -131,9 +131,10 @@ static int hfsplus_system_write_inode(struct inode *inode) hfsplus_inode_write_fork(inode, fork); if (tree) { int err = hfs_btree_write(tree); + if (err) { pr_err("b-tree write err: %d, ino %lu\n", - err, inode->i_ino); + err, inode->i_ino); return err; } } diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 3f999649587f..cc6235671437 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -24,8 +24,8 @@ struct hfsplus_wd { u16 embed_count; }; -/* - * hfsplus_submit_bio - Perfrom block I/O +/** + * hfsplus_submit_bio - Perform block I/O * @sb: super block of volume for I/O * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes * @buf: buffer for I/O @@ -231,10 +231,8 @@ reread: if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize)) goto out_free_backup_vhdr; sbi->alloc_blksz = blocksize; - sbi->alloc_blksz_shift = 0; - while ((blocksize >>= 1) != 0) - sbi->alloc_blksz_shift++; - blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); + sbi->alloc_blksz_shift = ilog2(blocksize); + blocksize = min_t(u32, sbi->alloc_blksz, PAGE_SIZE); /* * Align block size to block offset. diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 4e27edc082a4..d98094a9f476 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -8,6 +8,7 @@ #include "hfsplus_fs.h" #include <linux/posix_acl_xattr.h> +#include <linux/nls.h> #include "xattr.h" #include "acl.h" @@ -66,10 +67,10 @@ static void hfsplus_init_header_node(struct inode *attr_file, char *bmp; u32 used_nodes; u32 used_bmp_bytes; - loff_t tmp; + u64 tmp; hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %u\n", - clump_size, node_size); + clump_size, node_size); /* The end of the node contains list of record offsets */ rec_offsets = (__be16 *)(buf + node_size); @@ -195,7 +196,7 @@ check_attr_tree_state_again: } while (hip->alloc_blocks < hip->clump_blocks) { - err = hfsplus_file_extend(attr_file); + err = hfsplus_file_extend(attr_file, false); if (unlikely(err)) { pr_err("failed to extend attributes file\n"); goto end_attr_file_creation; @@ -645,8 +646,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) struct hfs_find_data fd; u16 key_len = 0; struct hfsplus_attr_key attr_key; - char strbuf[HFSPLUS_ATTR_MAX_STRLEN + - XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + char *strbuf; int xattr_name_len; if ((!S_ISREG(inode->i_mode) && @@ -666,6 +666,13 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) return err; } + strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); + if (!strbuf) { + res = -ENOMEM; + goto out; + } + err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd); if (err) { if (err == -ENOENT) { @@ -692,7 +699,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) if (be32_to_cpu(attr_key.cnid) != inode->i_ino) goto end_listxattr; - xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN; + xattr_name_len = NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN; if (hfsplus_uni2asc(inode->i_sb, (const struct hfsplus_unistr *)&fd.key->attr.key_name, strbuf, &xattr_name_len)) { @@ -718,6 +725,8 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) } end_listxattr: + kfree(strbuf); +out: hfs_find_exit(&fd); return res; } @@ -797,47 +806,55 @@ end_removexattr: static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + - XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - /* * Don't allow retrieving properly prefixed attributes * by prepending them with "osx." */ if (is_known_namespace(name)) return -EOPNOTSUPP; + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; + strcpy(xattr_name, XATTR_MAC_OSX_PREFIX); + strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name); - return hfsplus_getxattr(dentry, xattr_name, buffer, size); + res = hfsplus_getxattr(dentry, xattr_name, buffer, size); + kfree(xattr_name); + return res; } static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + - XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - /* * Don't allow setting properly prefixed attributes * by prepending them with "osx." */ if (is_known_namespace(name)) return -EOPNOTSUPP; + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; + strcpy(xattr_name, XATTR_MAC_OSX_PREFIX); + strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name); - return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + kfree(xattr_name); + return res; } static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index 00722765ea79..6ec5e107691f 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -7,6 +7,8 @@ */ #include <linux/security.h> +#include <linux/nls.h> + #include "hfsplus_fs.h" #include "xattr.h" #include "acl.h" @@ -14,37 +16,43 @@ static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; strcpy(xattr_name, XATTR_SECURITY_PREFIX); strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name); - return hfsplus_getxattr(dentry, xattr_name, buffer, size); + res = hfsplus_getxattr(dentry, xattr_name, buffer, size); + kfree(xattr_name); + return res; } static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; strcpy(xattr_name, XATTR_SECURITY_PREFIX); strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name); - return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + kfree(xattr_name); + return res; } static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, @@ -62,31 +70,30 @@ static int hfsplus_initxattrs(struct inode *inode, void *fs_info) { const struct xattr *xattr; - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t xattr_name_len; + char *xattr_name; int err = 0; + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - xattr_name_len = strlen(xattr->name); - if (xattr_name_len == 0) + if (!strcmp(xattr->name, "")) continue; - if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN > - HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - strcpy(xattr_name, XATTR_SECURITY_PREFIX); strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, xattr->name); memset(xattr_name + - XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1); + XATTR_SECURITY_PREFIX_LEN + strlen(xattr->name), 0, 1); err = __hfsplus_setxattr(inode, xattr_name, xattr->value, xattr->value_len, 0); if (err) break; } + kfree(xattr_name); return err; } diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index 426cee277542..3c5f27e4746a 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -6,43 +6,51 @@ * Handler for trusted extended attributes. */ +#include <linux/nls.h> + #include "hfsplus_fs.h" #include "xattr.h" static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; strcpy(xattr_name, XATTR_TRUSTED_PREFIX); strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name); - return hfsplus_getxattr(dentry, xattr_name, buffer, size); + res = hfsplus_getxattr(dentry, xattr_name, buffer, size); + kfree(xattr_name); + return res; } static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; strcpy(xattr_name, XATTR_TRUSTED_PREFIX); strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name); - return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + kfree(xattr_name); + return res; } static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index e34016561ae0..2b625a538b64 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -6,43 +6,51 @@ * Handler for user extended attributes. */ +#include <linux/nls.h> + #include "hfsplus_fs.h" #include "xattr.h" static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; strcpy(xattr_name, XATTR_USER_PREFIX); strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name); - return hfsplus_getxattr(dentry, xattr_name, buffer, size); + res = hfsplus_getxattr(dentry, xattr_name, buffer, size); + kfree(xattr_name); + return res; } static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags, int type) { - char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; - size_t len = strlen(name); + char *xattr_name; + int res; if (!strcmp(name, "")) return -EINVAL; - if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) - return -EOPNOTSUPP; - + xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1, + GFP_KERNEL); + if (!xattr_name) + return -ENOMEM; strcpy(xattr_name, XATTR_USER_PREFIX); strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name); - return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); + kfree(xattr_name); + return res; } static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 58b5106186d0..f005046e1591 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -316,7 +316,7 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) struct quad_buffer_head qbh; __le32 *bmp; struct hpfs_sb_info *sbi = hpfs_sb(s); - /*printk("2 - ");*/ + /*pr_info("2 - ");*/ if (!n) return; if (sec < 0x12) { hpfs_error(s, "Trying to free reserved sector %08x", sec); diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index 139ef1684d07..8057fe4e6574 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c @@ -55,7 +55,7 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head if (bh != NULL) return bh->b_data; else { - printk("HPFS: hpfs_map_sector: read error\n"); + pr_err("%s(): read error\n", __func__); return NULL; } } @@ -76,7 +76,7 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head set_buffer_uptodate(bh); return bh->b_data; } else { - printk("HPFS: hpfs_get_sector: getblk failed\n"); + pr_err("%s(): getblk failed\n", __func__); return NULL; } } @@ -93,7 +93,7 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe cond_resched(); if (secno & 3) { - printk("HPFS: hpfs_map_4sectors: unaligned read\n"); + pr_err("%s(): unaligned read\n", __func__); return NULL; } @@ -112,7 +112,7 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe qbh->data = data = kmalloc(2048, GFP_NOFS); if (!data) { - printk("HPFS: hpfs_map_4sectors: out of memory\n"); + pr_err("%s(): out of memory\n", __func__); goto bail4; } @@ -145,7 +145,7 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno, hpfs_lock_assert(s); if (secno & 3) { - printk("HPFS: hpfs_get_4sectors: unaligned read\n"); + pr_err("%s(): unaligned read\n", __func__); return NULL; } @@ -161,7 +161,7 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno, } if (!(qbh->data = kmalloc(2048, GFP_NOFS))) { - printk("HPFS: hpfs_get_4sectors: out of memory\n"); + pr_err("%s(): out of memory\n", __func__); goto bail4; } return qbh->data; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 292b1acb9b81..2a8e07425de0 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -36,7 +36,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) mutex_lock(&i->i_mutex); hpfs_lock(s); - /*printk("dir lseek\n");*/ + /*pr_info("dir lseek\n");*/ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; while (pos != new_off) { @@ -51,7 +51,7 @@ ok: mutex_unlock(&i->i_mutex); return new_off; fail: - /*printk("illegal lseek: %016llx\n", new_off);*/ + /*pr_warn("illegal lseek: %016llx\n", new_off);*/ hpfs_unlock(s); mutex_unlock(&i->i_mutex); return -ESPIPE; @@ -127,7 +127,7 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx) if (ctx->pos == 12) goto out; if (ctx->pos == 3 || ctx->pos == 4 || ctx->pos == 5) { - printk("HPFS: warning: pos==%d\n",(int)ctx->pos); + pr_err("pos==%d\n", (int)ctx->pos); goto out; } if (ctx->pos == 0) { diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 4364b2a02c5d..f36fc010fccb 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -17,7 +17,7 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde) if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i; i++; } - printk("HPFS: get_pos: not_found\n"); + pr_info("%s(): not_found\n", __func__); return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1; } @@ -32,7 +32,7 @@ void hpfs_add_pos(struct inode *inode, loff_t *pos) if (hpfs_inode->i_rddir_off[i] == pos) return; if (!(i&0x0f)) { if (!(ppos = kmalloc((i+0x11) * sizeof(loff_t*), GFP_NOFS))) { - printk("HPFS: out of memory for position list\n"); + pr_err("out of memory for position list\n"); return; } if (hpfs_inode->i_rddir_off) { @@ -63,7 +63,8 @@ void hpfs_del_pos(struct inode *inode, loff_t *pos) } return; not_f: - /*printk("HPFS: warning: position pointer %p->%08x not found\n", pos, (int)*pos);*/ + /*pr_warn("position pointer %p->%08x not found\n", + pos, (int)*pos);*/ return; } @@ -92,8 +93,11 @@ static void hpfs_pos_ins(loff_t *p, loff_t d, loff_t c) { if ((*p & ~0x3f) == (d & ~0x3f) && (*p & 0x3f) >= (d & 0x3f)) { int n = (*p & 0x3f) + c; - if (n > 0x3f) printk("HPFS: hpfs_pos_ins: %08x + %d\n", (int)*p, (int)c >> 8); - else *p = (*p & ~0x3f) | n; + if (n > 0x3f) + pr_err("%s(): %08x + %d\n", + __func__, (int)*p, (int)c >> 8); + else + *p = (*p & ~0x3f) | n; } } @@ -101,8 +105,11 @@ static void hpfs_pos_del(loff_t *p, loff_t d, loff_t c) { if ((*p & ~0x3f) == (d & ~0x3f) && (*p & 0x3f) >= (d & 0x3f)) { int n = (*p & 0x3f) - c; - if (n < 1) printk("HPFS: hpfs_pos_ins: %08x - %d\n", (int)*p, (int)c >> 8); - else *p = (*p & ~0x3f) | n; + if (n < 1) + pr_err("%s(): %08x - %d\n", + __func__, (int)*p, (int)c >> 8); + else + *p = (*p & ~0x3f) | n; } } @@ -239,12 +246,12 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, struct fnode *fnode; int c1, c2 = 0; if (!(nname = kmalloc(256, GFP_NOFS))) { - printk("HPFS: out of memory, can't add to dnode\n"); + pr_err("out of memory, can't add to dnode\n"); return 1; } go_up: if (namelen >= 256) { - hpfs_error(i->i_sb, "hpfs_add_to_dnode: namelen == %d", namelen); + hpfs_error(i->i_sb, "%s(): namelen == %d", __func__, namelen); kfree(nd); kfree(nname); return 1; @@ -281,7 +288,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno, not be any error while splitting dnodes, otherwise the whole directory, not only file we're adding, would be lost. */ - printk("HPFS: out of memory for dnode splitting\n"); + pr_err("out of memory for dnode splitting\n"); hpfs_brelse4(&qbh); kfree(nname); return 1; @@ -597,7 +604,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) if (!de_next->down) goto endm; ndown = de_down_pointer(de_next); if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) { - printk("HPFS: out of memory for dtree balancing\n"); + pr_err("out of memory for dtree balancing\n"); goto endm; } memcpy(de_cp, de, le16_to_cpu(de->length)); @@ -612,7 +619,8 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) hpfs_brelse4(&qbh1); } hpfs_add_to_dnode(i, ndown, de_cp->name, de_cp->namelen, de_cp, de_cp->down ? de_down_pointer(de_cp) : 0); - /*printk("UP-TO-DNODE: %08x (ndown = %08x, down = %08x, dno = %08x)\n", up, ndown, down, dno);*/ + /*pr_info("UP-TO-DNODE: %08x (ndown = %08x, down = %08x, dno = %08x)\n", + up, ndown, down, dno);*/ dno = up; kfree(de_cp); goto try_it_again; @@ -637,15 +645,15 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) if (!dlp && down) { if (le32_to_cpu(d1->first_free) > 2044) { if (hpfs_sb(i->i_sb)->sb_chk >= 2) { - printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); - printk("HPFS: warning: terminating balancing operation\n"); + pr_err("unbalanced dnode tree, see hpfs.txt 4 more info\n"); + pr_err("terminating balancing operation\n"); } hpfs_brelse4(&qbh1); goto endm; } if (hpfs_sb(i->i_sb)->sb_chk >= 2) { - printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); - printk("HPFS: warning: goin'on\n"); + pr_err("unbalanced dnode tree, see hpfs.txt 4 more info\n"); + pr_err("goin'on\n"); } le16_add_cpu(&del->length, 4); del->down = 1; @@ -659,7 +667,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); } else goto endm; if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { - printk("HPFS: out of memory for dtree balancing\n"); + pr_err("out of memory for dtree balancing\n"); hpfs_brelse4(&qbh1); goto endm; } @@ -1000,7 +1008,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, int d1, d2 = 0; name1 = f->name; if (!(name2 = kmalloc(256, GFP_NOFS))) { - printk("HPFS: out of memory, can't map dirent\n"); + pr_err("out of memory, can't map dirent\n"); return NULL; } if (f->len <= 15) diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index bcaafcd2666a..ce3f98ba993a 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -51,7 +51,7 @@ static char *get_indirect_ea(struct super_block *s, int ano, secno a, int size) { char *ret; if (!(ret = kmalloc(size + 1, GFP_NOFS))) { - printk("HPFS: out of memory for EA\n"); + pr_err("out of memory for EA\n"); return NULL; } if (hpfs_ea_read(s, a, ano, 0, size, ret)) { @@ -139,7 +139,7 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si if (ea_indirect(ea)) return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { - printk("HPFS: out of memory for EA\n"); + pr_err("out of memory for EA\n"); return NULL; } memcpy(ret, ea_data(ea), ea_valuelen(ea)); @@ -165,7 +165,7 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si if (ea_indirect(ea)) return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { - printk("HPFS: out of memory for EA\n"); + pr_err("out of memory for EA\n"); return NULL; } if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) { diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 3ba49c080e42..b63b75fa00e7 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -8,6 +8,11 @@ //#define DBG //#define DEBUG_LOCKS +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mutex.h> #include <linux/pagemap.h> diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 50a427313835..7ce4b74234a1 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -183,7 +183,8 @@ void hpfs_write_inode(struct inode *i) struct inode *parent; if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return; if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) { - if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n"); + if (*hpfs_inode->i_rddir_off) + pr_err("write_inode: some position still there\n"); kfree(hpfs_inode->i_rddir_off); hpfs_inode->i_rddir_off = NULL; } diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 3aa66ae1031e..442770edcdc7 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -65,12 +65,13 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); if (!cp) return NULL; if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) { - printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic)); + pr_err("Code page directory magic doesn't match (magic = %08x)\n", + le32_to_cpu(cp->magic)); brelse(bh); return NULL; } if (!le32_to_cpu(cp->n_code_pages)) { - printk("HPFS: n_code_pages == 0\n"); + pr_err("n_code_pages == 0\n"); brelse(bh); return NULL; } @@ -79,19 +80,19 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) brelse(bh); if (cpi >= 3) { - printk("HPFS: Code page index out of array\n"); + pr_err("Code page index out of array\n"); return NULL; } if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; if (le16_to_cpu(cpd->offs[cpi]) > 0x178) { - printk("HPFS: Code page index out of sector\n"); + pr_err("Code page index out of sector\n"); brelse(bh); return NULL; } ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6; if (!(cp_table = kmalloc(256, GFP_KERNEL))) { - printk("HPFS: out of memory for code page table\n"); + pr_err("out of memory for code page table\n"); brelse(bh); return NULL; } @@ -114,7 +115,7 @@ __le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) int i; __le32 *b; if (!(b = kmalloc(n * 512, GFP_KERNEL))) { - printk("HPFS: can't allocate memory for bitmap directory\n"); + pr_err("can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;i<n;i++) { @@ -281,7 +282,9 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno, hpfs_error(s, "dnode %08x does not end with \\377 entry", secno); goto bail; } - if (b == 3) printk("HPFS: warning: unbalanced dnode tree, dnode %08x; see hpfs.txt 4 more info\n", secno); + if (b == 3) + pr_err("unbalanced dnode tree, dnode %08x; see hpfs.txt 4 more info\n", + secno); } return dnode; bail: diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c index 9acdf338def0..b00d396d22c6 100644 --- a/fs/hpfs/name.c +++ b/fs/hpfs/name.c @@ -56,14 +56,15 @@ unsigned char *hpfs_translate_name(struct super_block *s, unsigned char *from, unsigned char *to; int i; if (hpfs_sb(s)->sb_chk >= 2) if (hpfs_is_name_long(from, len) != lng) { - printk("HPFS: Long name flag mismatch - name "); - for (i=0; i<len; i++) printk("%c", from[i]); - printk(" misidentified as %s.\n", lng ? "short" : "long"); - printk("HPFS: It's nothing serious. It could happen because of bug in OS/2.\nHPFS: Set checks=normal to disable this message.\n"); + pr_err("Long name flag mismatch - name "); + for (i = 0; i < len; i++) + pr_cont("%c", from[i]); + pr_cont(" misidentified as %s.\n", lng ? "short" : "long"); + pr_err("It's nothing serious. It could happen because of bug in OS/2.\nSet checks=normal to disable this message.\n"); } if (!lc) return from; if (!(to = kmalloc(len, GFP_KERNEL))) { - printk("HPFS: can't allocate memory for name conversion buffer\n"); + pr_err("can't allocate memory for name conversion buffer\n"); return from; } for (i = 0; i < len; i++) to[i] = locase(hpfs_sb(s)->sb_cp_table,from[i]); diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 1b39afdd86fd..bdbc2c3080a4 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -404,7 +404,7 @@ again: d_rehash(dentry); } else { struct iattr newattrs; - /*printk("HPFS: truncating file before delete.\n");*/ + /*pr_info("truncating file before delete.\n");*/ newattrs.ia_size = 0; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; err = notify_change(dentry, &newattrs, NULL); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fe3463a43236..7cd00d3a7c9b 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -62,22 +62,26 @@ void hpfs_error(struct super_block *s, const char *fmt, ...) vsnprintf(err_buf, sizeof(err_buf), fmt, args); va_end(args); - printk("HPFS: filesystem error: %s", err_buf); + pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { - printk("; crashing the system because you wanted it\n"); + pr_cont("; crashing the system because you wanted it\n"); mark_dirty(s, 0); panic("HPFS panic"); } else if (hpfs_sb(s)->sb_err == 1) { - if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n"); + if (s->s_flags & MS_RDONLY) + pr_cont("; already mounted read-only\n"); else { - printk("; remounting read-only\n"); + pr_cont("; remounting read-only\n"); mark_dirty(s, 0); s->s_flags |= MS_RDONLY; } - } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n"); - else printk("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n"); - } else printk("\n"); + } else if (s->s_flags & MS_RDONLY) + pr_cont("; going on - but anything won't be destroyed because it's read-only\n"); + else + pr_cont("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n"); + } else + pr_cont("\n"); hpfs_sb(s)->sb_was_error = 1; } @@ -292,7 +296,7 @@ static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask, if (!opts) return 1; - /*printk("Parsing opts: '%s'\n",opts);*/ + /*pr_info("Parsing opts: '%s'\n",opts);*/ while ((p = strsep(&opts, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -387,7 +391,7 @@ static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask, static inline void hpfs_help(void) { - printk("\n\ + pr_info("\n\ HPFS filesystem options:\n\ help do not mount and display this text\n\ uid=xxx set uid of files that don't have uid specified in eas\n\ @@ -434,7 +438,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &eas, &chk, &errs, &chkdsk, ×hift))) { - printk("HPFS: bad mount options.\n"); + pr_err("bad mount options.\n"); goto out_err; } if (o == 2) { @@ -442,7 +446,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) goto out_err; } if (timeshift != sbi->sb_timeshift) { - printk("HPFS: timeshift can't be changed using remount.\n"); + pr_err("timeshift can't be changed using remount.\n"); goto out_err; } @@ -523,7 +527,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &eas, &chk, &errs, &chkdsk, ×hift))) { - printk("HPFS: bad mount options.\n"); + pr_err("bad mount options.\n"); goto bail0; } if (o==2) { @@ -542,16 +546,17 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC || le32_to_cpu(spareblock->magic) != SP_MAGIC) { - if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n"); + if (!silent) + pr_err("Bad magic ... probably not HPFS\n"); goto bail4; } /* Check version */ if (!(s->s_flags & MS_RDONLY) && superblock->funcversion != 2 && superblock->funcversion != 3) { - printk("HPFS: Bad version %d,%d. Mount readonly to go around\n", + pr_err("Bad version %d,%d. Mount readonly to go around\n", (int)superblock->version, (int)superblock->funcversion); - printk("HPFS: please try recent version of HPFS driver at http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi and if it still can't understand this format, contact author - mikulas@artax.karlin.mff.cuni.cz\n"); + pr_err("please try recent version of HPFS driver at http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi and if it still can't understand this format, contact author - mikulas@artax.karlin.mff.cuni.cz\n"); goto bail4; } @@ -597,7 +602,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) /* Check for general fs errors*/ if (spareblock->dirty && !spareblock->old_wrote) { if (errs == 2) { - printk("HPFS: Improperly stopped, not mounted\n"); + pr_err("Improperly stopped, not mounted\n"); goto bail4; } hpfs_error(s, "improperly stopped"); @@ -611,22 +616,25 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) if (spareblock->hotfixes_used || spareblock->n_spares_used) { if (errs >= 2) { - printk("HPFS: Hotfixes not supported here, try chkdsk\n"); + pr_err("Hotfixes not supported here, try chkdsk\n"); mark_dirty(s, 0); goto bail4; } hpfs_error(s, "hotfixes not supported here, try chkdsk"); - if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n"); - else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n"); + if (errs == 0) + pr_err("Proceeding, but your filesystem will be probably corrupted by this driver...\n"); + else + pr_err("This driver may read bad files or crash when operating on disk with hotfixes.\n"); } if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) { if (errs >= 2) { - printk("HPFS: Spare dnodes used, try chkdsk\n"); + pr_err("Spare dnodes used, try chkdsk\n"); mark_dirty(s, 0); goto bail4; } hpfs_error(s, "warning: spare dnodes used, try chkdsk"); - if (errs == 0) printk("HPFS: Proceeding, but your filesystem could be corrupted if you delete files or directories\n"); + if (errs == 0) + pr_err("Proceeding, but your filesystem could be corrupted if you delete files or directories\n"); } if (chk) { unsigned a; @@ -645,12 +653,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) goto bail4; } sbi->sb_dirband_size = a; - } else printk("HPFS: You really don't want any checks? You are crazy...\n"); + } else + pr_err("You really don't want any checks? You are crazy...\n"); /* Load code page table */ if (le32_to_cpu(spareblock->n_code_pages)) if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir)))) - printk("HPFS: Warning: code page support is disabled\n"); + pr_err("code page support is disabled\n"); brelse(bh2); brelse(bh1); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e19d4c0cacae..1e2872b25343 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -6,6 +6,8 @@ * Copyright (C) 2002 Linus Torvalds. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/thread_info.h> #include <asm/current.h> @@ -475,7 +477,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, * annotation because huge_pmd_share() does an allocation under * i_mmap_mutex. */ -struct lock_class_key hugetlbfs_i_mmap_mutex_key; +static struct lock_class_key hugetlbfs_i_mmap_mutex_key; static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct inode *dir, @@ -823,8 +825,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ps = memparse(args[0].from, &rest); pconfig->hstate = size_to_hstate(ps); if (!pconfig->hstate) { - printk(KERN_ERR - "hugetlbfs: Unsupported page size %lu MB\n", + pr_err("Unsupported page size %lu MB\n", ps >> 20); return -EINVAL; } @@ -832,8 +833,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) } default: - printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", - p); + pr_err("Bad mount option: \"%s\"\n", p); return -EINVAL; break; } @@ -853,8 +853,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) return 0; bad_val: - printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", - args[0].from, p); + pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); return -EINVAL; } @@ -902,8 +901,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) goto out_free; return 0; out_free: - if (sbinfo->spool) - kfree(sbinfo->spool); + kfree(sbinfo->spool); kfree(sbinfo); return -ENOMEM; } @@ -939,7 +937,7 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static struct dentry_operations anon_ops = { +static const struct dentry_operations anon_ops = { .d_dname = simple_dname }; @@ -970,8 +968,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, *user = current_user(); if (user_shm_lock(size, *user)) { task_lock(current); - printk_once(KERN_WARNING - "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", + pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", current->comm, current->pid); task_unlock(current); } else { @@ -1031,7 +1028,7 @@ static int __init init_hugetlbfs_fs(void) int i; if (!hugepages_supported()) { - pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n"); + pr_info("disabling because there are no supported hugepage sizes\n"); return -ENOTSUPP; } @@ -1060,7 +1057,7 @@ static int __init init_hugetlbfs_fs(void) buf); if (IS_ERR(hugetlbfs_vfsmount[i])) { - pr_err("hugetlb: Cannot mount internal hugetlbfs for " + pr_err("Cannot mount internal hugetlbfs for " "page size %uK", ps_kb); error = PTR_ERR(hugetlbfs_vfsmount[i]); hugetlbfs_vfsmount[i] = NULL; diff --git a/fs/inode.c b/fs/inode.c index f96d2a6f88cc..2feb9b69f1be 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -105,7 +105,7 @@ long get_nr_dirty_inodes(void) * Handle nr_inode sysctl */ #ifdef CONFIG_SYSCTL -int proc_nr_inodes(ctl_table *table, int write, +int proc_nr_inodes(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); diff --git a/fs/ioprio.c b/fs/ioprio.c deleted file mode 100644 index e50170ca7c33..000000000000 --- a/fs/ioprio.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * fs/ioprio.c - * - * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> - * - * Helper functions for setting/querying io priorities of processes. The - * system calls closely mimmick getpriority/setpriority, see the man page for - * those. The prio argument is a composite of prio class and prio data, where - * the data argument has meaning within that class. The standard scheduling - * classes have 8 distinct prio levels, with 0 being the highest prio and 7 - * being the lowest. - * - * IOW, setting BE scheduling class with prio 2 is done ala: - * - * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; - * - * ioprio_set(PRIO_PROCESS, pid, prio); - * - * See also Documentation/block/ioprio.txt - * - */ -#include <linux/gfp.h> -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/ioprio.h> -#include <linux/blkdev.h> -#include <linux/capability.h> -#include <linux/syscalls.h> -#include <linux/security.h> -#include <linux/pid_namespace.h> - -int set_task_ioprio(struct task_struct *task, int ioprio) -{ - int err; - struct io_context *ioc; - const struct cred *cred = current_cred(), *tcred; - - rcu_read_lock(); - tcred = __task_cred(task); - if (!uid_eq(tcred->uid, cred->euid) && - !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); - - err = security_task_setioprio(task, ioprio); - if (err) - return err; - - ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); - if (ioc) { - ioc->ioprio = ioprio; - put_io_context(ioc); - } - - return err; -} -EXPORT_SYMBOL_GPL(set_task_ioprio); - -SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) -{ - int class = IOPRIO_PRIO_CLASS(ioprio); - int data = IOPRIO_PRIO_DATA(ioprio); - struct task_struct *p, *g; - struct user_struct *user; - struct pid *pgrp; - kuid_t uid; - int ret; - - switch (class) { - case IOPRIO_CLASS_RT: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - /* fall through, rt has prio field too */ - case IOPRIO_CLASS_BE: - if (data >= IOPRIO_BE_NR || data < 0) - return -EINVAL; - - break; - case IOPRIO_CLASS_IDLE: - break; - case IOPRIO_CLASS_NONE: - if (data) - return -EINVAL; - break; - default: - return -EINVAL; - } - - ret = -ESRCH; - rcu_read_lock(); - switch (which) { - case IOPRIO_WHO_PROCESS: - if (!who) - p = current; - else - p = find_task_by_vpid(who); - if (p) - ret = set_task_ioprio(p, ioprio); - break; - case IOPRIO_WHO_PGRP: - if (!who) - pgrp = task_pgrp(current); - else - pgrp = find_vpid(who); - do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { - ret = set_task_ioprio(p, ioprio); - if (ret) - break; - } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); - break; - case IOPRIO_WHO_USER: - uid = make_kuid(current_user_ns(), who); - if (!uid_valid(uid)) - break; - if (!who) - user = current_user(); - else - user = find_user(uid); - - if (!user) - break; - - do_each_thread(g, p) { - if (!uid_eq(task_uid(p), uid)) - continue; - ret = set_task_ioprio(p, ioprio); - if (ret) - goto free_uid; - } while_each_thread(g, p); -free_uid: - if (who) - free_uid(user); - break; - default: - ret = -EINVAL; - } - - rcu_read_unlock(); - return ret; -} - -static int get_task_ioprio(struct task_struct *p) -{ - int ret; - - ret = security_task_getioprio(p); - if (ret) - goto out; - ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); - if (p->io_context) - ret = p->io_context->ioprio; -out: - return ret; -} - -int ioprio_best(unsigned short aprio, unsigned short bprio) -{ - unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); - unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); - - if (aclass == IOPRIO_CLASS_NONE) - aclass = IOPRIO_CLASS_BE; - if (bclass == IOPRIO_CLASS_NONE) - bclass = IOPRIO_CLASS_BE; - - if (aclass == bclass) - return min(aprio, bprio); - if (aclass > bclass) - return bprio; - else - return aprio; -} - -SYSCALL_DEFINE2(ioprio_get, int, which, int, who) -{ - struct task_struct *g, *p; - struct user_struct *user; - struct pid *pgrp; - kuid_t uid; - int ret = -ESRCH; - int tmpio; - - rcu_read_lock(); - switch (which) { - case IOPRIO_WHO_PROCESS: - if (!who) - p = current; - else - p = find_task_by_vpid(who); - if (p) - ret = get_task_ioprio(p); - break; - case IOPRIO_WHO_PGRP: - if (!who) - pgrp = task_pgrp(current); - else - pgrp = find_vpid(who); - do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { - tmpio = get_task_ioprio(p); - if (tmpio < 0) - continue; - if (ret == -ESRCH) - ret = tmpio; - else - ret = ioprio_best(ret, tmpio); - } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); - break; - case IOPRIO_WHO_USER: - uid = make_kuid(current_user_ns(), who); - if (!who) - user = current_user(); - else - user = find_user(uid); - - if (!user) - break; - - do_each_thread(g, p) { - if (!uid_eq(task_uid(p), user->uid)) - continue; - tmpio = get_task_ioprio(p); - if (tmpio < 0) - continue; - if (ret == -ESRCH) - ret = tmpio; - else - ret = ioprio_best(ret, tmpio); - } while_each_thread(g, p); - - if (who) - free_uid(user); - break; - default: - ret = -EINVAL; - } - - rcu_read_unlock(); - return ret; -} diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 5f26139a165a..6fac74349856 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -43,7 +43,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) clear_buffer_uptodate(bh); if (orig_bh) { clear_bit_unlock(BH_Shadow, &orig_bh->b_state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&orig_bh->b_state, BH_Shadow); } unlock_buffer(bh); @@ -239,7 +239,7 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } spin_unlock(&journal->j_list_lock); @@ -277,7 +277,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, } spin_lock(&journal->j_list_lock); clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 2b60ce1996aa..bb9cebc9ca8a 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -75,10 +75,13 @@ void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c) static int jffs2_garbage_collect_thread(void *_c) { struct jffs2_sb_info *c = _c; + sigset_t hupmask; + siginitset(&hupmask, sigmask(SIGHUP)); allow_signal(SIGKILL); allow_signal(SIGSTOP); allow_signal(SIGCONT); + allow_signal(SIGHUP); c->gc_task = current; complete(&c->gc_thread_start); @@ -87,7 +90,7 @@ static int jffs2_garbage_collect_thread(void *_c) set_freezable(); for (;;) { - allow_signal(SIGHUP); + sigprocmask(SIG_UNBLOCK, &hupmask, NULL); again: spin_lock(&c->erase_completion_lock); if (!jffs2_thread_should_wake(c)) { @@ -95,10 +98,9 @@ static int jffs2_garbage_collect_thread(void *_c) spin_unlock(&c->erase_completion_lock); jffs2_dbg(1, "%s(): sleeping...\n", __func__); schedule(); - } else + } else { spin_unlock(&c->erase_completion_lock); - - + } /* Problem - immediately after bootup, the GCD spends a lot * of time in places like jffs2_kill_fragtree(); so much so * that userspace processes (like gdm and X) are starved @@ -150,7 +152,7 @@ static int jffs2_garbage_collect_thread(void *_c) } } /* We don't want SIGHUP to interrupt us. STOP and KILL are OK though. */ - disallow_signal(SIGHUP); + sigprocmask(SIG_BLOCK, &hupmask, NULL); jffs2_dbg(1, "%s(): pass\n", __func__); if (jffs2_garbage_collect_pass(c) == -ENOSPC) { diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 5a8ea16eedbc..0c8ca830b113 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -83,13 +83,15 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: ea_name = POSIX_ACL_XATTR_ACCESS; - rc = posix_acl_equiv_mode(acl, &inode->i_mode); - if (rc < 0) - return rc; - inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); - if (rc == 0) - acl = NULL; + if (acl) { + rc = posix_acl_equiv_mode(acl, &inode->i_mode); + if (rc < 0) + return rc; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + if (rc == 0) + acl = NULL; + } break; case ACL_TYPE_DEFAULT: ea_name = POSIX_ACL_XATTR_DEFAULT; diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 370d7b6c5942..2d514c7affc2 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1208,7 +1208,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, * by this leaf. */ l2size = - min((int)leaf[word], NLSTOL2BSZ(nwords)); + min_t(int, leaf[word], NLSTOL2BSZ(nwords)); /* determine how many words were handled. */ @@ -1902,7 +1902,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) /* determine how many blocks to allocate from this dmap. */ - nb = min(n, (s64)BPERDMAP); + nb = min_t(s64, n, BPERDMAP); /* allocate the blocks from the dmap. */ @@ -2260,7 +2260,8 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * of bits being allocated and the l2 number * of bits currently described by this leaf. */ - size = min((int)leaf[word], NLSTOL2BSZ(nwords)); + size = min_t(int, leaf[word], + NLSTOL2BSZ(nwords)); /* update the leaf to reflect the allocation. * in addition to setting the leaf value to @@ -3563,7 +3564,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) if (mp == NULL) goto errout; - n = min(nblocks, (s64)BPERDMAP); + n = min_t(s64, nblocks, BPERDMAP); } dp = (struct dmap *) mp->data; diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 7f464c513ba0..6b0f816201a2 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -29,20 +29,20 @@ void jfs_set_inode_flags(struct inode *inode) { unsigned int flags = JFS_IP(inode)->mode2; - - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | - S_NOATIME | S_DIRSYNC | S_SYNC); + unsigned int new_fl = 0; if (flags & JFS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & JFS_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & JFS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & JFS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; if (flags & JFS_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME | + S_DIRSYNC | S_SYNC); } void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 8d811e02b4b9..0acddf60af55 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -167,7 +167,7 @@ do { \ * Global list of active external journals */ static LIST_HEAD(jfs_external_logs); -static struct jfs_log *dummy_log = NULL; +static struct jfs_log *dummy_log; static DEFINE_MUTEX(jfs_log_mutex); /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 97f7fda51890..adf8cb045b9e 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -50,14 +50,14 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); MODULE_LICENSE("GPL"); -static struct kmem_cache * jfs_inode_cachep; +static struct kmem_cache *jfs_inode_cachep; static const struct super_operations jfs_super_operations; static const struct export_operations jfs_export_operations; static struct file_system_type jfs_fs_type; #define MAX_COMMIT_THREADS 64 -static int commit_threads = 0; +static int commit_threads; module_param(commit_threads, int, 0); MODULE_PARM_DESC(commit_threads, "Number of commit threads"); @@ -84,8 +84,7 @@ static void jfs_handle_error(struct super_block *sb) panic("JFS (device %s): panic forced after error\n", sb->s_id); else if (sbi->flag & JFS_ERR_REMOUNT_RO) { - jfs_err("ERROR: (device %s): remounting filesystem " - "as read-only\n", + jfs_err("ERROR: (device %s): remounting filesystem as read-only\n", sb->s_id); sb->s_flags |= MS_RDONLY; } @@ -273,7 +272,10 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_resize: { char *resize = args[0].from; - *newLVSize = simple_strtoull(resize, &resize, 0); + int rc = kstrtoll(resize, 0, newLVSize); + + if (rc) + goto cleanup; break; } case Opt_resize_nosize: @@ -327,7 +329,11 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_uid: { char *uid = args[0].from; - uid_t val = simple_strtoul(uid, &uid, 0); + uid_t val; + int rc = kstrtouint(uid, 0, &val); + + if (rc) + goto cleanup; sbi->uid = make_kuid(current_user_ns(), val); if (!uid_valid(sbi->uid)) goto cleanup; @@ -337,7 +343,11 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_gid: { char *gid = args[0].from; - gid_t val = simple_strtoul(gid, &gid, 0); + gid_t val; + int rc = kstrtouint(gid, 0, &val); + + if (rc) + goto cleanup; sbi->gid = make_kgid(current_user_ns(), val); if (!gid_valid(sbi->gid)) goto cleanup; @@ -347,7 +357,10 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_umask: { char *umask = args[0].from; - sbi->umask = simple_strtoul(umask, &umask, 8); + int rc = kstrtouint(umask, 8, &sbi->umask); + + if (rc) + goto cleanup; if (sbi->umask & ~0777) { pr_err("JFS: Invalid value of umask\n"); goto cleanup; @@ -363,12 +376,10 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, * -> user has more control over the online trimming */ sbi->minblks_trim = 64; - if (blk_queue_discard(q)) { + if (blk_queue_discard(q)) *flag |= JFS_DISCARD; - } else { - pr_err("JFS: discard option " \ - "not supported on device\n"); - } + else + pr_err("JFS: discard option not supported on device\n"); break; } @@ -380,20 +391,21 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, { struct request_queue *q = bdev_get_queue(sb->s_bdev); char *minblks_trim = args[0].from; + int rc; if (blk_queue_discard(q)) { *flag |= JFS_DISCARD; - sbi->minblks_trim = simple_strtoull( - minblks_trim, &minblks_trim, 0); - } else { - pr_err("JFS: discard option " \ - "not supported on device\n"); - } + rc = kstrtouint(minblks_trim, 0, + &sbi->minblks_trim); + if (rc) + goto cleanup; + } else + pr_err("JFS: discard option not supported on device\n"); break; } default: - printk("jfs: Unrecognized mount option \"%s\" " - " or missing value\n", p); + printk("jfs: Unrecognized mount option \"%s\" or missing value\n", + p); goto cleanup; } } @@ -419,14 +431,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) int ret; sync_filesystem(sb); - if (!parse_options(data, sb, &newLVSize, &flag)) { + if (!parse_options(data, sb, &newLVSize, &flag)) return -EINVAL; - } if (newLVSize) { if (sb->s_flags & MS_RDONLY) { - pr_err("JFS: resize requires volume" \ - " to be mounted read-write\n"); + pr_err("JFS: resize requires volume to be mounted read-write\n"); return -EROFS; } rc = jfs_extendfs(sb, newLVSize, 0); @@ -452,9 +462,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) } if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { rc = dquot_suspend(sb, -1); - if (rc < 0) { + if (rc < 0) return rc; - } rc = jfs_umount_rw(sb); JFS_SBI(sb)->flag = flag; return rc; @@ -487,7 +496,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) if (!new_valid_dev(sb->s_bdev->bd_dev)) return -EOVERFLOW; - sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); + sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; @@ -548,9 +557,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) rc = jfs_mount(sb); if (rc) { - if (!silent) { + if (!silent) jfs_err("jfs_mount failed w/return code = %d", rc); - } goto out_mount_failed; } if (sb->s_flags & MS_RDONLY) @@ -587,7 +595,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) * Page cache is indexed by long. * I would use MAX_LFS_FILESIZE, but it's only half as big */ - sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes); + sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, + (u64)sb->s_maxbytes); #endif sb->s_time_gran = 1; return 0; @@ -597,9 +606,8 @@ out_no_root: out_no_rw: rc = jfs_umount(sb); - if (rc) { + if (rc) jfs_err("jfs_umount failed with return code %d", rc); - } out_mount_failed: filemap_write_and_wait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); @@ -924,7 +932,8 @@ static int __init init_jfs_fs(void) commit_threads = MAX_COMMIT_THREADS; for (i = 0; i < commit_threads; i++) { - jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit"); + jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, + "jfsCommit"); if (IS_ERR(jfsCommitThread[i])) { rc = PTR_ERR(jfsCommitThread[i]); jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index ac127cd008bf..a693f5b01ae6 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -714,6 +714,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, return ERR_PTR(-ENOMEM); ida_init(&root->ino_ida); + INIT_LIST_HEAD(&root->supers); kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e01ea4a14a01..e3d37f607f97 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -14,6 +14,7 @@ #include <linux/poll.h> #include <linux/pagemap.h> #include <linux/sched.h> +#include <linux/fsnotify.h> #include "kernfs-internal.h" @@ -610,6 +611,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn, static int kernfs_fop_open(struct inode *inode, struct file *file) { struct kernfs_node *kn = file->f_path.dentry->d_fsdata; + struct kernfs_root *root = kernfs_root(kn); const struct kernfs_ops *ops; struct kernfs_open_file *of; bool has_read, has_write, has_mmap; @@ -624,14 +626,16 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) has_write = ops->write || ops->mmap; has_mmap = ops->mmap; - /* check perms and supported operations */ - if ((file->f_mode & FMODE_WRITE) && - (!(inode->i_mode & S_IWUGO) || !has_write)) - goto err_out; + /* see the flag definition for details */ + if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { + if ((file->f_mode & FMODE_WRITE) && + (!(inode->i_mode & S_IWUGO) || !has_write)) + goto err_out; - if ((file->f_mode & FMODE_READ) && - (!(inode->i_mode & S_IRUGO) || !has_read)) - goto err_out; + if ((file->f_mode & FMODE_READ) && + (!(inode->i_mode & S_IRUGO) || !has_read)) + goto err_out; + } /* allocate a kernfs_open_file for the file */ error = -ENOMEM; @@ -787,20 +791,48 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) */ void kernfs_notify(struct kernfs_node *kn) { + struct kernfs_root *root = kernfs_root(kn); struct kernfs_open_node *on; + struct kernfs_super_info *info; unsigned long flags; + if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) + return; + + /* kick poll */ spin_lock_irqsave(&kernfs_open_node_lock, flags); - if (!WARN_ON(kernfs_type(kn) != KERNFS_FILE)) { - on = kn->attr.open; - if (on) { - atomic_inc(&on->event); - wake_up_interruptible(&on->poll); - } + on = kn->attr.open; + if (on) { + atomic_inc(&on->event); + wake_up_interruptible(&on->poll); } spin_unlock_irqrestore(&kernfs_open_node_lock, flags); + + /* kick fsnotify */ + mutex_lock(&kernfs_mutex); + + list_for_each_entry(info, &root->supers, node) { + struct inode *inode; + struct dentry *dentry; + + inode = ilookup(info->sb, kn->ino); + if (!inode) + continue; + + dentry = d_find_any_alias(inode); + if (dentry) { + fsnotify_parent(NULL, dentry, FS_MODIFY); + fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, + NULL, 0); + dput(dentry); + } + + iput(inode); + } + + mutex_unlock(&kernfs_mutex); } EXPORT_SYMBOL_GPL(kernfs_notify); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 8be13b2a079b..dc84a3ef9ca2 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -49,6 +49,8 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) * mount.c */ struct kernfs_super_info { + struct super_block *sb; + /* * The root associated with this super_block. Each super_block is * identified by the root and ns it's associated with. @@ -62,6 +64,9 @@ struct kernfs_super_info { * an array and compare kernfs_node tag against every entry. */ const void *ns; + + /* anchored at kernfs_root->supers, protected by kernfs_mutex */ + struct list_head node; }; #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 6a5f04ac8704..d171b98a6cdd 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -62,15 +62,16 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) return NULL; } -static int kernfs_fill_super(struct super_block *sb) +static int kernfs_fill_super(struct super_block *sb, unsigned long magic) { struct kernfs_super_info *info = kernfs_info(sb); struct inode *inode; struct dentry *root; + info->sb = sb; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = magic; sb->s_op = &kernfs_sops; sb->s_time_gran = 1; @@ -131,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb) * @fs_type: file_system_type of the fs being mounted * @flags: mount flags specified for the mount * @root: kernfs_root of the hierarchy being mounted + * @magic: file system specific magic number * @new_sb_created: tell the caller if we allocated a new superblock * @ns: optional namespace tag of the mount * @@ -142,8 +144,8 @@ const void *kernfs_super_ns(struct super_block *sb) * The return value can be passed to the vfs layer verbatim. */ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, bool *new_sb_created, - const void *ns) + struct kernfs_root *root, unsigned long magic, + bool *new_sb_created, const void *ns) { struct super_block *sb; struct kernfs_super_info *info; @@ -166,12 +168,18 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, *new_sb_created = !sb->s_root; if (!sb->s_root) { - error = kernfs_fill_super(sb); + struct kernfs_super_info *info = kernfs_info(sb); + + error = kernfs_fill_super(sb, magic); if (error) { deactivate_locked_super(sb); return ERR_PTR(error); } sb->s_flags |= MS_ACTIVE; + + mutex_lock(&kernfs_mutex); + list_add(&info->node, &root->supers); + mutex_unlock(&kernfs_mutex); } return dget(sb->s_root); @@ -190,6 +198,10 @@ void kernfs_kill_sb(struct super_block *sb) struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_node *root_kn = sb->s_root->d_fsdata; + mutex_lock(&kernfs_mutex); + list_del(&info->node); + mutex_unlock(&kernfs_mutex); + /* * Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing kernfs_super_info. diff --git a/fs/libfs.c b/fs/libfs.c index a1844244246f..88e3e00e2eca 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -3,6 +3,7 @@ * Library for filesystems writers. */ +#include <linux/blkdev.h> #include <linux/export.h> #include <linux/pagemap.h> #include <linux/slab.h> @@ -923,16 +924,19 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, EXPORT_SYMBOL_GPL(generic_fh_to_parent); /** - * generic_file_fsync - generic fsync implementation for simple filesystems + * __generic_file_fsync - generic fsync implementation for simple filesystems + * * @file: file to synchronize + * @start: start offset in bytes + * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * * This is a generic implementation of the fsync method for simple * filesystems which track all non-inode metadata in the buffers list * hanging off the address_space structure. */ -int generic_file_fsync(struct file *file, loff_t start, loff_t end, - int datasync) +int __generic_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { struct inode *inode = file->f_mapping->host; int err; @@ -952,10 +956,34 @@ int generic_file_fsync(struct file *file, loff_t start, loff_t end, err = sync_inode_metadata(inode, 1); if (ret == 0) ret = err; + out: mutex_unlock(&inode->i_mutex); return ret; } +EXPORT_SYMBOL(__generic_file_fsync); + +/** + * generic_file_fsync - generic fsync implementation for simple filesystems + * with flush + * @file: file to synchronize + * @start: start offset in bytes + * @end: end offset in bytes (inclusive) + * @datasync: only synchronize essential metadata if true + * + */ + +int generic_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct inode *inode = file->f_mapping->host; + int err; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + return err; + return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); +} EXPORT_SYMBOL(generic_file_fsync); /** diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 6bf06a07f3e0..de051cb1f553 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -436,7 +436,7 @@ EXPORT_SYMBOL_GPL(lockd_down); * Sysctl parameters (same as module parameters, different interface). */ -static ctl_table nlm_sysctls[] = { +static struct ctl_table nlm_sysctls[] = { { .procname = "nlm_grace_period", .data = &nlm_grace_period, @@ -490,7 +490,7 @@ static ctl_table nlm_sysctls[] = { { } }; -static ctl_table nlm_sysctl_dir[] = { +static struct ctl_table nlm_sysctl_dir[] = { { .procname = "nfs", .mode = 0555, @@ -499,7 +499,7 @@ static ctl_table nlm_sysctl_dir[] = { { } }; -static ctl_table nlm_sysctl_root[] = { +static struct ctl_table nlm_sysctl_root[] = { { .procname = "fs", .mode = 0555, diff --git a/fs/locks.c b/fs/locks.c index e663aeac579e..da57c9b7e844 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -130,6 +130,9 @@ #include <linux/percpu.h> #include <linux/lglock.h> +#define CREATE_TRACE_POINTS +#include <trace/events/filelock.h> + #include <asm/uaccess.h> #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) @@ -322,6 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock, return -ENOMEM; fl->fl_file = filp; + fl->fl_owner = (fl_owner_t)filp; fl->fl_pid = current->tgid; fl->fl_flags = FL_FLOCK; fl->fl_type = type; @@ -389,18 +393,6 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, fl->fl_ops = NULL; fl->fl_lmops = NULL; - /* Ensure that fl->fl_filp has compatible f_mode */ - switch (l->l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - return -EBADF; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - break; - } - return assign_type(fl, l->l_type); } @@ -439,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) if (assign_type(fl, type) != 0) return -EINVAL; - fl->fl_owner = current->files; + fl->fl_owner = (fl_owner_t)filp; fl->fl_pid = current->tgid; fl->fl_file = filp; @@ -1298,6 +1290,7 @@ static void time_out_leases(struct inode *inode) before = &inode->i_flock; while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { + trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) lease_modify(before, F_RDLCK); if (past_time(fl->fl_break_time)) @@ -1385,6 +1378,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) } if (i_have_this_lease || (mode & O_NONBLOCK)) { + trace_break_lease_noblock(inode, new_fl); error = -EWOULDBLOCK; goto out; } @@ -1396,10 +1390,12 @@ restart: if (break_time == 0) break_time++; locks_insert_block(flock, new_fl); + trace_break_lease_block(inode, new_fl); spin_unlock(&inode->i_lock); error = wait_event_interruptible_timeout(new_fl->fl_wait, !new_fl->fl_next, break_time); spin_lock(&inode->i_lock); + trace_break_lease_unblock(inode, new_fl); locks_delete_block(new_fl); if (error >= 0) { if (error == 0) @@ -1521,6 +1517,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp int error; lease = *flp; + trace_generic_add_lease(inode, lease); + /* * In the delegation case we need mutual exclusion with * a number of operations that take the i_mutex. We trylock @@ -1610,6 +1608,8 @@ static int generic_delete_lease(struct file *filp, struct file_lock **flp) struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; + trace_generic_delete_lease(inode, *flp); + for (before = &inode->i_flock; ((fl = *before) != NULL) && IS_LEASE(fl); before = &fl->fl_next) { @@ -2034,6 +2034,22 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, return error; } +/* Ensure that fl->fl_filp has compatible f_mode for F_SETLK calls */ +static int +check_fmode_for_setlk(struct file_lock *fl) +{ + switch (fl->fl_type) { + case F_RDLCK: + if (!(fl->fl_file->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(fl->fl_file->f_mode & FMODE_WRITE)) + return -EBADF; + } + return 0; +} + /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ @@ -2071,6 +2087,10 @@ again: if (error) goto out; + error = check_fmode_for_setlk(file_lock); + if (error) + goto out; + /* * If the cmd is requesting file-private locks, then set the * FL_OFDLCK flag and override the owner. @@ -2206,6 +2226,10 @@ again: if (error) goto out; + error = check_fmode_for_setlk(file_lock); + if (error) + goto out; + /* * If the cmd is requesting file-private locks, then set the * FL_OFDLCK flag and override the owner. @@ -2304,6 +2328,7 @@ void locks_remove_file(struct file *filp) if (filp->f_op->flock) { struct file_lock fl = { + .fl_owner = (fl_owner_t)filp, .fl_pid = current->tgid, .fl_file = filp, .fl_flags = FL_FLOCK, @@ -2411,31 +2436,31 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, seq_printf(f, "%lld:%s ", id, pfx); if (IS_POSIX(fl)) { if (fl->fl_flags & FL_ACCESS) - seq_printf(f, "ACCESS"); + seq_puts(f, "ACCESS"); else if (IS_OFDLCK(fl)) - seq_printf(f, "OFDLCK"); + seq_puts(f, "OFDLCK"); else - seq_printf(f, "POSIX "); + seq_puts(f, "POSIX "); seq_printf(f, " %s ", (inode == NULL) ? "*NOINODE*" : mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { if (fl->fl_type & LOCK_MAND) { - seq_printf(f, "FLOCK MSNFS "); + seq_puts(f, "FLOCK MSNFS "); } else { - seq_printf(f, "FLOCK ADVISORY "); + seq_puts(f, "FLOCK ADVISORY "); } } else if (IS_LEASE(fl)) { - seq_printf(f, "LEASE "); + seq_puts(f, "LEASE "); if (lease_breaking(fl)) - seq_printf(f, "BREAKING "); + seq_puts(f, "BREAKING "); else if (fl->fl_file) - seq_printf(f, "ACTIVE "); + seq_puts(f, "ACTIVE "); else - seq_printf(f, "BREAKER "); + seq_puts(f, "BREAKER "); } else { - seq_printf(f, "UNKNOWN UNKNOWN "); + seq_puts(f, "UNKNOWN UNKNOWN "); } if (fl->fl_type & LOCK_MAND) { seq_printf(f, "%s ", @@ -2467,7 +2492,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, else seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); } else { - seq_printf(f, "0 EOF\n"); + seq_puts(f, "0 EOF\n"); } } diff --git a/fs/mpage.c b/fs/mpage.c index 4979ffa60aaa..5f9ed622274f 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -48,23 +48,7 @@ static void mpage_end_io(struct bio *bio, int err) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - - if (bio_data_dir(bio) == READ) { - if (!err) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); - } else { /* bio_data_dir(bio) == WRITE */ - if (err) { - SetPageError(page); - if (page->mapping) - set_bit(AS_EIO, &page->mapping->flags); - } - end_page_writeback(page); - } + page_endio(page, bio_data_dir(bio), err); } bio_put(bio); @@ -285,6 +269,11 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, alloc_new: if (bio == NULL) { + if (first_hole == blocks_per_page) { + if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9), + page)) + goto out; + } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), min_t(int, nr_pages, bio_get_nr_vecs(bdev)), GFP_KERNEL); @@ -439,6 +428,35 @@ struct mpage_data { unsigned use_writepage; }; +/* + * We have our BIO, so we can now mark the buffers clean. Make + * sure to only clean buffers which we know we'll be writing. + */ +static void clean_buffers(struct page *page, unsigned first_unmapped) +{ + unsigned buffer_counter = 0; + struct buffer_head *bh, *head; + if (!page_has_buffers(page)) + return; + head = page_buffers(page); + bh = head; + + do { + if (buffer_counter++ == first_unmapped) + break; + clear_buffer_dirty(bh); + bh = bh->b_this_page; + } while (bh != head); + + /* + * we cannot drop the bh if the page is not uptodate or a concurrent + * readpage would fail to serialize with the bh and it would read from + * disk before we reach the platter. + */ + if (buffer_heads_over_limit && PageUptodate(page)) + try_to_free_buffers(page); +} + static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { @@ -574,6 +592,13 @@ page_is_mapped: alloc_new: if (bio == NULL) { + if (first_unmapped == blocks_per_page) { + if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), + page, wbc)) { + clean_buffers(page, first_unmapped); + goto out; + } + } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH); if (bio == NULL) @@ -591,30 +616,7 @@ alloc_new: goto alloc_new; } - /* - * OK, we have our BIO, so we can now mark the buffers clean. Make - * sure to only clean buffers which we know we'll be writing. - */ - if (page_has_buffers(page)) { - struct buffer_head *head = page_buffers(page); - struct buffer_head *bh = head; - unsigned buffer_counter = 0; - - do { - if (buffer_counter++ == first_unmapped) - break; - clear_buffer_dirty(bh); - bh = bh->b_this_page; - } while (bh != head); - - /* - * we cannot drop the bh if the page is not uptodate - * or a concurrent readpage would fail to serialize with the bh - * and it would read from disk before we reach the platter. - */ - if (buffer_heads_over_limit && PageUptodate(page)) - try_to_free_buffers(page); - } + clean_buffers(page, first_unmapped); BUG_ON(PageWriteback(page)); set_page_writeback(page); diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c index 03ffde1f44d6..344889cd120e 100644 --- a/fs/ncpfs/getopt.c +++ b/fs/ncpfs/getopt.c @@ -53,15 +53,14 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts return -EINVAL; } if (opts->has_arg & OPT_INT) { - char* v; + int rc = kstrtoul(val, 0, value); - *value = simple_strtoul(val, &v, 0); - if (!*v) { - return opts->val; + if (rc) { + pr_info("%s: invalid numeric value in %s=%s\n", + caller, token, val); + return rc; } - pr_info("%s: invalid numeric value in %s=%s\n", - caller, token, val); - return -EDOM; + return opts->val; } if (opts->has_arg & OPT_STRING) { return opts->val; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d9f3d067cd15..4a3d4ef76127 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2032,9 +2032,9 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); kfree(entry); - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); - smp_mb__after_atomic_dec(); + smp_mb__after_atomic(); } static void nfs_access_free_list(struct list_head *head) @@ -2082,9 +2082,9 @@ nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) else { remove_lru_entry: list_del_init(&nfsi->access_cache_inode_lru); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } spin_unlock(&inode->i_lock); } @@ -2232,9 +2232,9 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) nfs_access_add_rbtree(inode, cache); /* Update accounting */ - smp_mb__before_atomic_inc(); + smp_mb__before_atomic(); atomic_long_inc(&nfs_access_nr_entries); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); /* Add inode to global LRU list */ if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 284ca901fe16..c1edf7336315 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -916,10 +916,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) is_local = 1; /* We're simulating flock() locks using posix locks on the server */ - fl->fl_owner = (fl_owner_t)filp; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - if (fl->fl_type == F_UNLCK) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0c438973f3c8..e6f7398d2b3c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1085,7 +1085,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) trace_nfs_invalidate_mapping_exit(inode, ret); clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(bitlock, NFS_INO_INVALIDATING); out: return ret; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index efac602edb37..b9c61efe9660 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -789,9 +789,9 @@ static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NFS4DS_CONNECTING, &ds->ds_state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2349518eef2c..c0583b9bef71 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1140,9 +1140,9 @@ static int nfs4_run_state_manager(void *); static void nfs4_clear_state_manager_bit(struct nfs_client *clp) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); rpc_wake_up(&clp->cl_rpcwaitq); } diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 2628d921b7e3..b6ebe7e445f6 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -16,7 +16,7 @@ static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; static struct ctl_table_header *nfs4_callback_sysctl_table; -static ctl_table nfs4_cb_sysctls[] = { +static struct ctl_table nfs4_cb_sysctls[] = { { .procname = "nfs_callback_tcpport", .data = &nfs_callback_set_tcpport, @@ -36,7 +36,7 @@ static ctl_table nfs4_cb_sysctls[] = { { } }; -static ctl_table nfs4_cb_sysctl_dir[] = { +static struct ctl_table nfs4_cb_sysctl_dir[] = { { .procname = "nfs", .mode = 0555, @@ -45,7 +45,7 @@ static ctl_table nfs4_cb_sysctl_dir[] = { { } }; -static ctl_table nfs4_cb_sysctl_root[] = { +static struct ctl_table nfs4_cb_sysctl_root[] = { { .procname = "fs", .mode = 0555, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2ffebf2081ce..03ed984ab4d8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -95,7 +95,7 @@ nfs_iocounter_dec(struct nfs_io_counter *c) { if (atomic_dec_and_test(&c->io_count)) { clear_bit(NFS_IO_INPROGRESS, &c->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&c->flags, NFS_IO_INPROGRESS); } } @@ -193,9 +193,9 @@ void nfs_unlock_request(struct nfs_page *req) printk(KERN_ERR "NFS: Invalid unlock attempted\n"); BUG(); } - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(PG_BUSY, &req->wb_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&req->wb_flags, PG_BUSY); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cb53d450ae32..fd9536e494bc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1810,7 +1810,7 @@ static void pnfs_clear_layoutcommitting(struct inode *inode) unsigned long *bitlock = &NFS_I(inode)->flags; clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 023793909778..c3058a076596 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -275,7 +275,7 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { atomic_inc(&lseg->pls_refcount); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); } return lseg; } diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 6b3f2535a3ec..bb6ed810fa6f 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -13,7 +13,7 @@ static struct ctl_table_header *nfs_callback_sysctl_table; -static ctl_table nfs_cb_sysctls[] = { +static struct ctl_table nfs_cb_sysctls[] = { { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, @@ -31,7 +31,7 @@ static ctl_table nfs_cb_sysctls[] = { { } }; -static ctl_table nfs_cb_sysctl_dir[] = { +static struct ctl_table nfs_cb_sysctl_dir[] = { { .procname = "nfs", .mode = 0555, @@ -40,7 +40,7 @@ static ctl_table nfs_cb_sysctl_dir[] = { { } }; -static ctl_table nfs_cb_sysctl_root[] = { +static struct ctl_table nfs_cb_sysctl_root[] = { { .procname = "fs", .mode = 0555, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9a3b6a4cd6b9..ffb9459f180b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -405,7 +405,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_pageio_complete(&pgio); clear_bit_unlock(NFS_INO_FLUSHING, bitlock); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(bitlock, NFS_INO_FLUSHING); if (err < 0) @@ -1458,7 +1458,7 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) static void nfs_commit_clear_lock(struct nfs_inode *nfsi) { clear_bit(NFS_INO_COMMIT, &nfsi->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6f3f392d48af..f66c66b9f182 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -402,8 +402,10 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; @@ -530,13 +532,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all @@ -589,7 +590,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) add_to_mask(state, &state->groups->aces[i].perms); } - if (!state->users->n && !state->groups->n) { + if (state->users->n || state->groups->n) { pace++; pace->e_tag = ACL_MASK; low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3ba65979a3cd..9a77a5a21557 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1078,6 +1078,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return NULL; } clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } @@ -1095,6 +1107,7 @@ free_client(struct nfs4_client *clp) WARN_ON_ONCE(atomic_read(&ses->se_ref)); free_session(ses); } + rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); @@ -1347,7 +1360,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, if (clp == NULL) return NULL; - INIT_LIST_HEAD(&clp->cl_sessions); ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { spin_lock(&nn->client_lock); @@ -1355,20 +1367,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); - clp->cl_cb_state = NFSD4_CB_UNKNOWN; - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); - INIT_LIST_HEAD(&clp->cl_revoked); - spin_lock_init(&clp->cl_lock); nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); - rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); gen_confirm(clp); @@ -3716,9 +3717,16 @@ out: static __be32 nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) { - if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) return nfserr_locks_held; - release_lock_stateid(stp); + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + unhash_lockowner(lo); return nfs_ok; } @@ -4158,6 +4166,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c if (!same_owner_str(&lo->lo_owner, owner, clid)) return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } lst = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); return lst->st_file->fi_inode == inode; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 732648b270dc..3fdc8a3e1134 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -25,6 +25,19 @@ #define FANOTIFY_DEFAULT_MAX_MARKS 8192 #define FANOTIFY_DEFAULT_MAX_LISTENERS 128 +/* + * All flags that may be specified in parameter event_f_flags of fanotify_init. + * + * Internal and external open flags are stored together in field f_flags of + * struct file. Only external open flags shall be allowed in event_f_flags. + * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be + * excluded. + */ +#define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ + O_ACCMODE | O_APPEND | O_NONBLOCK | \ + __O_SYNC | O_DSYNC | O_CLOEXEC | \ + O_LARGEFILE | O_NOATIME ) + extern const struct fsnotify_ops fanotify_fsnotify_ops; static struct kmem_cache *fanotify_mark_cache __read_mostly; @@ -669,6 +682,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & ~FAN_ALL_INIT_FLAGS) return -EINVAL; + if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) + return -EINVAL; + + switch (event_f_flags & O_ACCMODE) { + case O_RDONLY: + case O_RDWR: + case O_WRONLY: + break; + default: + return -EINVAL; + } + user = get_current_user(); if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { free_uid(user); @@ -776,7 +801,10 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, case FAN_MARK_REMOVE: if (!mask) return -EINVAL; + break; case FAN_MARK_FLUSH: + if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH)) + return -EINVAL; break; default: return -EINVAL; @@ -813,6 +841,15 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, group->priority == FS_PRIO_0) goto fput_and_out; + if (flags & FAN_MARK_FLUSH) { + ret = 0; + if (flags & FAN_MARK_MOUNT) + fsnotify_clear_vfsmount_marks_by_group(group); + else + fsnotify_clear_inode_marks_by_group(group); + goto fput_and_out; + } + ret = fanotify_find_path(dfd, pathname, &path, flags); if (ret) goto fput_and_out; @@ -824,7 +861,7 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, mnt = path.mnt; /* create/update an inode mark */ - switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: if (flags & FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); @@ -837,12 +874,6 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, else ret = fanotify_remove_inode_mark(group, inode, mask, flags); break; - case FAN_MARK_FLUSH: - if (flags & FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); - break; default: ret = -EINVAL; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 78a2ca3966c3..cc423a30a0c8 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -57,7 +57,7 @@ static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; static int zero; -ctl_table inotify_table[] = { +struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &inotify_max_user_instances, diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 923fe4a5f503..d90deaa08e78 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -340,7 +340,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, static int fsnotify_mark_destroy(void *ignored) { struct fsnotify_mark *mark, *next; - LIST_HEAD(private_destroy_list); + struct list_head private_destroy_list; for (;;) { spin_lock(&destroy_lock); diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index a27e3fecefaf..250ed5b20c8f 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1748,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) if (page) { set_page_dirty(page); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); } ntfs_debug("Done."); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index ee4144ce5d7c..f82498c35e78 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -58,7 +58,7 @@ typedef enum { /** * ntfs_compression_buffer - one buffer for the decompression engine */ -static u8 *ntfs_compression_buffer = NULL; +static u8 *ntfs_compression_buffer; /** * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index db9bd8a31725..86ddab916b66 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2060,7 +2060,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, } do { unlock_page(pages[--do_pages]); - mark_page_accessed(pages[do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); if (unlikely(status)) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 9de2491f2926..6c3296e546c3 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -50,8 +50,8 @@ static unsigned long ntfs_nr_compression_users; /* A global default upcase table and a corresponding reference count. */ -static ntfschar *default_upcase = NULL; -static unsigned long ntfs_nr_upcase_users = 0; +static ntfschar *default_upcase; +static unsigned long ntfs_nr_upcase_users; /* Error constants/strings used in inode.c::ntfs_show_options(). */ typedef enum { diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c index 79a89184cb5e..a503156ec15f 100644 --- a/fs/ntfs/sysctl.c +++ b/fs/ntfs/sysctl.c @@ -34,7 +34,7 @@ #include "debug.h" /* Definition of the ntfs sysctl. */ -static ctl_table ntfs_sysctls[] = { +static struct ctl_table ntfs_sysctls[] = { { .procname = "ntfs-debug", .data = &debug_msgs, /* Data pointer and size. */ @@ -46,7 +46,7 @@ static ctl_table ntfs_sysctls[] = { }; /* Define the parent directory /proc/sys/fs. */ -static ctl_table sysctls_root[] = { +static struct ctl_table sysctls_root[] = { { .procname = "fs", .mode = 0555, @@ -56,7 +56,7 @@ static ctl_table sysctls_root[] = { }; /* Storage for the sysctls header. */ -static struct ctl_table_header *sysctls_root_table = NULL; +static struct ctl_table_header *sysctls_root_table; /** * ntfs_sysctl - add or remove the debug sysctl diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b4deb5f750d9..9d8fcf2f3b94 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6046,7 +6046,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, int cancel) { - if (osb->osb_tl_inode) { + if (osb->osb_tl_inode && + atomic_read(&osb->osb_tl_disable) == 0) { /* We want to push off log flushes while truncates are * still running. */ if (cancel) @@ -6223,6 +6224,8 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) int status; struct inode *tl_inode = osb->osb_tl_inode; + atomic_set(&osb->osb_tl_disable, 1); + if (tl_inode) { cancel_delayed_work(&osb->osb_truncate_log_wq); flush_workqueue(ocfs2_wq); @@ -6254,6 +6257,7 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) * until we're sure all is well. */ INIT_DELAYED_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker); + atomic_set(&osb->osb_tl_disable, 0); osb->osb_tl_bh = tl_bh; osb->osb_tl_inode = tl_inode; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index bf482dfed14f..73039295d0d1 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1107,7 +1107,7 @@ static int o2hb_thread(void *data) mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread running\n"); - set_user_nice(current, -20); + set_user_nice(current, MIN_NICE); /* Pin node */ o2nm_depend_this_node(); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index c6b90e670389..a68e07a9bd46 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -108,7 +108,7 @@ static struct rb_root o2net_handler_tree = RB_ROOT; static struct o2net_node o2net_nodes[O2NM_MAX_NODES]; /* XXX someday we'll need better accounting */ -static struct socket *o2net_listen_sock = NULL; +static struct socket *o2net_listen_sock; /* * listen work is only queued by the listening socket callbacks on the diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e0517762fcc0..a106b3f2b22a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -108,7 +108,6 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len) struct dlm_recovery_ctxt { struct list_head resources; - struct list_head received; struct list_head node_data; u8 new_master; u8 dead_node; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index e33cd7a3c582..18f13c2e4a10 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -338,7 +338,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle) #ifdef CONFIG_DEBUG_FS -static struct dentry *dlm_debugfs_root = NULL; +static struct dentry *dlm_debugfs_root; #define DLM_DEBUGFS_DIR "o2dlm" #define DLM_DEBUGFS_DLM_STATE "dlm_state" diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index c973690dc0bc..39efc5057a36 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -959,6 +959,14 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, * domain. Set him in the map and clean up our * leftover join state. */ BUG_ON(dlm->joining_node != assert->node_idx); + + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { + mlog(0, "dlm recovery is ongoing, disallow join\n"); + spin_unlock(&dlm->spinlock); + spin_unlock(&dlm_domain_lock); + return -EAGAIN; + } + set_bit(assert->node_idx, dlm->domain_map); clear_bit(assert->node_idx, dlm->exit_domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); @@ -1517,6 +1525,7 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, unsigned int node) { int status; + int ret; struct dlm_assert_joined assert_msg; mlog(0, "Sending join assert to node %u\n", node); @@ -1528,11 +1537,13 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, &assert_msg, sizeof(assert_msg), node, - NULL); + &ret); if (status < 0) mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, node); + else + status = ret; return status; } @@ -2023,7 +2034,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, INIT_LIST_HEAD(&dlm->list); INIT_LIST_HEAD(&dlm->dirty_list); INIT_LIST_HEAD(&dlm->reco.resources); - INIT_LIST_HEAD(&dlm->reco.received); INIT_LIST_HEAD(&dlm->reco.node_data); INIT_LIST_HEAD(&dlm->purge_list); INIT_LIST_HEAD(&dlm->dlm_domain_handlers); diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 5d32f7511f74..66c2a491f68d 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -52,7 +52,7 @@ #define MLOG_MASK_PREFIX ML_DLM #include "cluster/masklog.h" -static struct kmem_cache *dlm_lock_cache = NULL; +static struct kmem_cache *dlm_lock_cache; static DEFINE_SPINLOCK(dlm_cookie_lock); static u64 dlm_next_cookie = 1; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index af3f7aa73e13..3087a21d32f9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -82,9 +82,9 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, return 1; } -static struct kmem_cache *dlm_lockres_cache = NULL; -static struct kmem_cache *dlm_lockname_cache = NULL; -static struct kmem_cache *dlm_mle_cache = NULL; +static struct kmem_cache *dlm_lockres_cache; +static struct kmem_cache *dlm_lockname_cache; +static struct kmem_cache *dlm_mle_cache; static void dlm_mle_release(struct kref *kref); static void dlm_init_mle(struct dlm_master_list_entry *mle, @@ -472,11 +472,15 @@ bail: void dlm_destroy_master_caches(void) { - if (dlm_lockname_cache) + if (dlm_lockname_cache) { kmem_cache_destroy(dlm_lockname_cache); + dlm_lockname_cache = NULL; + } - if (dlm_lockres_cache) + if (dlm_lockres_cache) { kmem_cache_destroy(dlm_lockres_cache); + dlm_lockres_cache = NULL; + } } static void dlm_lockres_release(struct kref *kref) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index fe29f7978f81..5de019437ea5 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1986,7 +1986,15 @@ skip_lvb: } if (!bad) { dlm_lock_get(newlock); - list_add_tail(&newlock->list, queue); + if (mres->flags & DLM_MRES_RECOVERY && + ml->list == DLM_CONVERTING_LIST && + newlock->ml.type > + newlock->ml.convert_type) { + /* newlock is doing downconvert, add it to the + * head of converting list */ + list_add(&newlock->list, queue); + } else + list_add_tail(&newlock->list, queue); mlog(0, "%s:%.*s: added lock for node %u, " "setting refmap bit\n", dlm->name, res->lockname.len, res->lockname.name, ml->node); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6bd690b5a061..52cfe99ae056 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2544,11 +2544,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb, * refreshed, so we do it here. Of course, making sense of * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); - if (status < 0) { - ocfs2_cluster_unlock(osb, lockres, level); - mlog_errno(status); - goto bail; - } if (status) { status = ocfs2_refresh_slot_info(osb); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8970dcf74de5..8eb6e5732d3b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -828,7 +828,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* * fs-writeback will release the dirty pages without page lock * whose offset are over inode size, the release happens at - * block_write_full_page_endio(). + * block_write_full_page(). */ i_size_write(inode, abs_to); inode->i_blocks = ocfs2_inode_sector_count(inode); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 490229f43731..6f66b3751ace 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -143,8 +143,8 @@ bail: return status; } -int ocfs2_info_handle_blocksize(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_blocksize(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_blocksize oib; @@ -167,8 +167,8 @@ bail: return status; } -int ocfs2_info_handle_clustersize(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_clustersize(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_clustersize oic; @@ -192,8 +192,8 @@ bail: return status; } -int ocfs2_info_handle_maxslots(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_maxslots(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_maxslots oim; @@ -217,8 +217,8 @@ bail: return status; } -int ocfs2_info_handle_label(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_label(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_label oil; @@ -242,8 +242,8 @@ bail: return status; } -int ocfs2_info_handle_uuid(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_uuid(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_uuid oiu; @@ -267,8 +267,8 @@ bail: return status; } -int ocfs2_info_handle_fs_features(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_fs_features(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_fs_features oif; @@ -294,8 +294,8 @@ bail: return status; } -int ocfs2_info_handle_journal_size(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_journal_size(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_journal_size oij; @@ -319,9 +319,10 @@ bail: return status; } -int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, - struct inode *inode_alloc, u64 blkno, - struct ocfs2_info_freeinode *fi, u32 slot) +static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, + struct inode *inode_alloc, u64 blkno, + struct ocfs2_info_freeinode *fi, + u32 slot) { int status = 0, unlock = 0; @@ -366,8 +367,8 @@ bail: return status; } -int ocfs2_info_handle_freeinode(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_freeinode(struct inode *inode, + struct ocfs2_info_request __user *req) { u32 i; u64 blkno = -1; @@ -462,19 +463,19 @@ static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats, stats->ffs_free_chunks_real++; } -void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, - unsigned int chunksize) +static void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, + unsigned int chunksize) { o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize); o2ffg_update_stats(&(ffg->iff_ffs), chunksize); } -int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, - struct inode *gb_inode, - struct ocfs2_dinode *gb_dinode, - struct ocfs2_chain_rec *rec, - struct ocfs2_info_freefrag *ffg, - u32 chunks_in_group) +static int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, + struct inode *gb_inode, + struct ocfs2_dinode *gb_dinode, + struct ocfs2_chain_rec *rec, + struct ocfs2_info_freefrag *ffg, + u32 chunks_in_group) { int status = 0, used; u64 blkno; @@ -572,9 +573,9 @@ bail: return status; } -int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb, - struct inode *gb_inode, u64 blkno, - struct ocfs2_info_freefrag *ffg) +static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb, + struct inode *gb_inode, u64 blkno, + struct ocfs2_info_freefrag *ffg) { u32 chunks_in_group; int status = 0, unlock = 0, i; @@ -652,8 +653,8 @@ bail: return status; } -int ocfs2_info_handle_freefrag(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_freefrag(struct inode *inode, + struct ocfs2_info_request __user *req) { u64 blkno = -1; char namebuf[40]; @@ -723,8 +724,8 @@ out_err: return status; } -int ocfs2_info_handle_unknown(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_unknown(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_request oir; @@ -752,8 +753,8 @@ bail: * - distinguish different requests. * - validate size of different requests. */ -int ocfs2_info_handle_request(struct inode *inode, - struct ocfs2_info_request __user *req) +static int ocfs2_info_handle_request(struct inode *inode, + struct ocfs2_info_request __user *req) { int status = -EFAULT; struct ocfs2_info_request oir; @@ -811,8 +812,8 @@ bail: return status; } -int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, - u64 *req_addr, int compat_flag) +static int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, + u64 *req_addr, int compat_flag) { int status = -EFAULT; u64 __user *bp = NULL; @@ -849,8 +850,8 @@ bail: * a better backward&forward compatibility, since a small piece of * request will be less likely to be broken if disk layout get changed. */ -int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, - int compat_flag) +static int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, + int compat_flag) { int i, status = 0; u64 req_addr; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 03ea9314fecd..4b0c68849b36 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -30,6 +30,7 @@ #include <linux/kthread.h> #include <linux/time.h> #include <linux/random.h> +#include <linux/delay.h> #include <cluster/masklog.h> @@ -2185,8 +2186,20 @@ static int ocfs2_commit_thread(void *arg) || kthread_should_stop()); status = ocfs2_commit_cache(osb); - if (status < 0) - mlog_errno(status); + if (status < 0) { + static unsigned long abort_warn_time; + + /* Warn about this once per minute */ + if (printk_timed_ratelimit(&abort_warn_time, 60*HZ)) + mlog(ML_ERROR, "status = %d, journal is " + "already aborted.\n", status); + /* + * After ocfs2_commit_cache() fails, j_num_trans has a + * non-zero value. Sleep here to avoid a busy-wait + * loop. + */ + msleep_interruptible(1000); + } if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){ mlog(ML_KTHREAD, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8d64a97a9d5e..bbec539230fd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -422,6 +422,7 @@ struct ocfs2_super struct inode *osb_tl_inode; struct buffer_head *osb_tl_bh; struct delayed_work osb_truncate_log_wq; + atomic_t osb_tl_disable; /* * How many clusters in our truncate log. * It must be protected by osb_tl_inode->i_mutex. diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 6ba4bcbc4796..714e53b9cc66 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -1408,10 +1408,9 @@ static void swap_refcount_rec(void *a, void *b, int size) { struct ocfs2_refcount_rec *l = a, *r = b, tmp; - tmp = *(struct ocfs2_refcount_rec *)l; - *(struct ocfs2_refcount_rec *)l = - *(struct ocfs2_refcount_rec *)r; - *(struct ocfs2_refcount_rec *)r = tmp; + tmp = *l; + *l = *r; + *r = tmp; } /* diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 822ebc10f281..d5da6f624142 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -53,8 +53,6 @@ */ static u16 ocfs2_calc_new_backup_super(struct inode *inode, struct ocfs2_group_desc *gd, - int new_clusters, - u32 first_new_cluster, u16 cl_cpg, int set) { @@ -127,8 +125,6 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, OCFS2_FEATURE_COMPAT_BACKUP_SB)) { backups = ocfs2_calc_new_backup_super(bm_inode, group, - new_clusters, - first_new_cluster, cl_cpg, 1); le16_add_cpu(&group->bg_free_bits_count, -1 * backups); } @@ -157,7 +153,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, spin_lock(&OCFS2_I(bm_inode)->ip_lock); OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); - le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits); + le64_add_cpu(&fe->i_size, (u64)new_clusters << osb->s_clustersize_bits); spin_unlock(&OCFS2_I(bm_inode)->ip_lock); i_size_write(bm_inode, le64_to_cpu(fe->i_size)); @@ -167,8 +163,6 @@ out_rollback: if (ret < 0) { ocfs2_calc_new_backup_super(bm_inode, group, - new_clusters, - first_new_cluster, cl_cpg, 0); le16_add_cpu(&group->bg_free_bits_count, backups); le16_add_cpu(&group->bg_bits, -1 * num_bits); @@ -569,7 +563,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) spin_lock(&OCFS2_I(main_bm_inode)->ip_lock); OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); - le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits); + le64_add_cpu(&fe->i_size, (u64)input->clusters << osb->s_clustersize_bits); spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock); i_size_write(main_bm_inode, le64_to_cpu(fe->i_size)); diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 83f1a665ae97..5d965e83bd43 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -709,7 +709,7 @@ static struct ctl_table ocfs2_root_table[] = { { } }; -static struct ctl_table_header *ocfs2_table_header = NULL; +static struct ctl_table_header *ocfs2_table_header; /* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a7cdd56f4c79..c7a89cea5c5d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -75,7 +75,7 @@ #include "buffer_head_io.h" -static struct kmem_cache *ocfs2_inode_cachep = NULL; +static struct kmem_cache *ocfs2_inode_cachep; struct kmem_cache *ocfs2_dquot_cachep; struct kmem_cache *ocfs2_qf_chunk_cachep; @@ -85,7 +85,7 @@ struct kmem_cache *ocfs2_qf_chunk_cachep; * workqueue and schedule on our own. */ struct workqueue_struct *ocfs2_wq = NULL; -static struct dentry *ocfs2_debugfs_root = NULL; +static struct dentry *ocfs2_debugfs_root; MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); @@ -2292,8 +2292,8 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - strncpy(osb->vol_label, di->id2.i_super.s_label, 63); - osb->vol_label[63] = '\0'; + strlcpy(osb->vol_label, di->id2.i_super.s_label, + OCFS2_MAX_VOL_LABEL_LEN); osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); osb->first_cluster_group_blkno = diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 52eaf33d346f..82e17b076ce7 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -67,7 +67,7 @@ struct ocfs2_meta_cache_item { sector_t c_block; }; -static struct kmem_cache *ocfs2_uptodate_cachep = NULL; +static struct kmem_cache *ocfs2_uptodate_cachep; u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 442177b1119a..cfa63ee92c96 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -300,6 +300,12 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } + if (vma->vm_ops && vma->vm_ops->name) { + name = vma->vm_ops->name(vma); + if (name) + goto done; + } + name = arch_vma_name(vma); if (!name) { pid_t tid; @@ -737,9 +743,6 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_file_clear_soft_dirty(ptent); } - if (vma->vm_flags & VM_SOFTDIRTY) - vma->vm_flags &= ~VM_SOFTDIRTY; - set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -807,8 +810,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) { soft_dirty_cleared = true; - pr_warn_once("The pagemap bits 55-60 has changed their meaning! " - "See the linux/Documentation/vm/pagemap.txt for details.\n"); + pr_warn_once("The pagemap bits 55-60 has changed their meaning!" + " See the linux/Documentation/vm/pagemap.txt for " + "details.\n"); } task = get_proc_task(file_inode(file)); @@ -839,11 +843,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, * * Writing 3 to /proc/pid/clear_refs only affects file * mapped pages. + * + * Writing 4 to /proc/pid/clear_refs affects all pages. */ if (type == CLEAR_REFS_ANON && vma->vm_file) continue; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) continue; + if (type == CLEAR_REFS_SOFT_DIRTY) { + if (vma->vm_flags & VM_SOFTDIRTY) + vma->vm_flags &= ~VM_SOFTDIRTY; + } walk_page_range(vma->vm_start, vma->vm_end, &clear_refs_walk); } @@ -1351,7 +1361,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, struct numa_maps *md; struct page *page; - if (pte_none(*pte)) + if (!pte_present(*pte)) return 0; page = pte_page(*pte); @@ -1408,10 +1418,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_printf(m, "%08lx %s", vma->vm_start, buffer); if (file) { - seq_printf(m, " file="); + seq_puts(m, " file="); seq_path(m, &file->f_path, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { - seq_printf(m, " heap"); + seq_puts(m, " heap"); } else { pid_t tid = vm_is_stack(task, vma, is_pid); if (tid != 0) { @@ -1421,14 +1431,14 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) */ if (!is_pid || (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack)) - seq_printf(m, " stack"); + seq_puts(m, " stack"); else seq_printf(m, " stack:%d", tid); } } if (is_vm_hugetlb_page(vma)) - seq_printf(m, " huge"); + seq_puts(m, " huge"); walk_page_range(vma->vm_start, vma->vm_end, &walk); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 6a8e785b29da..382aa890e228 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -42,7 +42,7 @@ static size_t elfnotes_sz; /* Total size of vmcore file. */ static u64 vmcore_size; -static struct proc_dir_entry *proc_vmcore = NULL; +static struct proc_dir_entry *proc_vmcore; /* * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 46d269e38706..0a9b72cdfeca 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -18,6 +18,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "pstore: " fmt + #include <linux/atomic.h> #include <linux/types.h> #include <linux/errno.h> @@ -224,14 +226,12 @@ static void allocate_buf_for_compression(void) zlib_inflate_workspacesize()); stream.workspace = kmalloc(size, GFP_KERNEL); if (!stream.workspace) { - pr_err("pstore: No memory for compression workspace; " - "skipping compression\n"); + pr_err("No memory for compression workspace; skipping compression\n"); kfree(big_oops_buf); big_oops_buf = NULL; } } else { - pr_err("No memory for uncompressed data; " - "skipping compression\n"); + pr_err("No memory for uncompressed data; skipping compression\n"); stream.workspace = NULL; } @@ -455,8 +455,7 @@ int pstore_register(struct pstore_info *psi) add_timer(&pstore_timer); } - pr_info("pstore: Registered %s as persistent store backend\n", - psi->name); + pr_info("Registered %s as persistent store backend\n", psi->name); return 0; } @@ -502,8 +501,8 @@ void pstore_get_records(int quiet) size = unzipped_len; compressed = false; } else { - pr_err("pstore: decompression failed;" - "returned %d\n", unzipped_len); + pr_err("decompression failed;returned %d\n", + unzipped_len); compressed = true; } } @@ -524,8 +523,8 @@ out: mutex_unlock(&psi->read_mutex); if (failed) - printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n", - failed, psi->name); + pr_warn("failed to load %d record(s) from '%s'\n", + failed, psi->name); } static void pstore_dowork(struct work_struct *work) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index ff7e3d4df5a1..34a1e5aa848c 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -12,6 +12,8 @@ * */ +#define pr_fmt(fmt) "persistent_ram: " fmt + #include <linux/device.h> #include <linux/err.h> #include <linux/errno.h> @@ -205,12 +207,10 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz) size = buffer->data + prz->buffer_size - block; numerr = persistent_ram_decode_rs8(prz, block, size, par); if (numerr > 0) { - pr_devel("persistent_ram: error in block %p, %d\n", - block, numerr); + pr_devel("error in block %p, %d\n", block, numerr); prz->corrected_bytes += numerr; } else if (numerr < 0) { - pr_devel("persistent_ram: uncorrectable error in block %p\n", - block); + pr_devel("uncorrectable error in block %p\n", block); prz->bad_blocks++; } block += prz->ecc_info.block_size; @@ -257,7 +257,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, prz->rs_decoder = init_rs(prz->ecc_info.symsize, prz->ecc_info.poly, 0, 1, prz->ecc_info.ecc_size); if (prz->rs_decoder == NULL) { - pr_info("persistent_ram: init_rs failed\n"); + pr_info("init_rs failed\n"); return -EINVAL; } @@ -267,10 +267,10 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, numerr = persistent_ram_decode_rs8(prz, buffer, sizeof(*buffer), prz->par_header); if (numerr > 0) { - pr_info("persistent_ram: error in header, %d\n", numerr); + pr_info("error in header, %d\n", numerr); prz->corrected_bytes += numerr; } else if (numerr < 0) { - pr_info("persistent_ram: uncorrectable error in header\n"); + pr_info("uncorrectable error in header\n"); prz->bad_blocks++; } @@ -317,7 +317,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz) prz->old_log = kmalloc(size, GFP_KERNEL); } if (!prz->old_log) { - pr_err("persistent_ram: failed to allocate buffer\n"); + pr_err("failed to allocate buffer\n"); return; } @@ -396,8 +396,8 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); if (!pages) { - pr_err("%s: Failed to allocate array for %u pages\n", __func__, - page_count); + pr_err("%s: Failed to allocate array for %u pages\n", + __func__, page_count); return NULL; } @@ -462,19 +462,17 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, if (prz->buffer->sig == sig) { if (buffer_size(prz) > prz->buffer_size || buffer_start(prz) > buffer_size(prz)) - pr_info("persistent_ram: found existing invalid buffer," - " size %zu, start %zu\n", - buffer_size(prz), buffer_start(prz)); + pr_info("found existing invalid buffer, size %zu, start %zu\n", + buffer_size(prz), buffer_start(prz)); else { - pr_debug("persistent_ram: found existing buffer," - " size %zu, start %zu\n", - buffer_size(prz), buffer_start(prz)); + pr_debug("found existing buffer, size %zu, start %zu\n", + buffer_size(prz), buffer_start(prz)); persistent_ram_save_old(prz); return 0; } } else { - pr_debug("persistent_ram: no valid data in buffer" - " (sig = 0x%08x)\n", prz->buffer->sig); + pr_debug("no valid data in buffer (sig = 0x%08x)\n", + prz->buffer->sig); } prz->buffer->sig = sig; @@ -509,7 +507,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, prz = kzalloc(sizeof(struct persistent_ram_zone), GFP_KERNEL); if (!prz) { - pr_err("persistent_ram: failed to allocate persistent ram zone\n"); + pr_err("failed to allocate persistent ram zone\n"); goto err; } diff --git a/fs/readdir.c b/fs/readdir.c index 5b53d995cae6..33fd92208cb7 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -13,6 +13,7 @@ #include <linux/stat.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/fsnotify.h> #include <linux/dirent.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -40,6 +41,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx) ctx->pos = file->f_pos; res = file->f_op->iterate(file, ctx); file->f_pos = ctx->pos; + fsnotify_access(file); file_accessed(file); } mutex_unlock(&inode->i_mutex); diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index dc9a6829f7c6..1bcffeab713c 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -142,7 +142,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, int org = *beg; BUG_ON(!th->t_trans_id); - RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); PROC_INFO_INC(s, scan_bitmap.bmap); @@ -321,7 +320,6 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th, unsigned int off_max = s->s_blocksize << 3; BUG_ON(!th->t_trans_id); - PROC_INFO_INC(s, scan_bitmap.call); if (SB_FREE_BLOCKS(s) <= 0) return 0; // No point in looking for more free blocks @@ -388,9 +386,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, unsigned int nr, offset; BUG_ON(!th->t_trans_id); - PROC_INFO_INC(s, free_block); - rs = SB_DISK_SUPER_BLOCK(s); sbh = SB_BUFFER_WITH_SB(s); apbi = SB_AP_BITMAP(s); @@ -435,8 +431,8 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th, int for_unformatted) { struct super_block *s = th->t_super; - BUG_ON(!th->t_trans_id); + BUG_ON(!th->t_trans_id); RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); if (!is_reusable(s, block, 1)) return; @@ -471,6 +467,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th, unsigned long save = ei->i_prealloc_block; int dirty = 0; struct inode *inode = &ei->vfs_inode; + BUG_ON(!th->t_trans_id); #ifdef CONFIG_REISERFS_CHECK if (ei->i_prealloc_count < 0) @@ -494,6 +491,7 @@ void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, struct inode *inode) { struct reiserfs_inode_info *ei = REISERFS_I(inode); + BUG_ON(!th->t_trans_id); if (ei->i_prealloc_count) __discard_prealloc(th, ei); @@ -504,7 +502,6 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th) struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; BUG_ON(!th->t_trans_id); - while (!list_empty(plist)) { struct reiserfs_inode_info *ei; ei = list_entry(plist->next, struct reiserfs_inode_info, @@ -562,7 +559,7 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options) if (!strcmp(this_char, "displacing_new_packing_localities")) { SET_OPTION(displacing_new_packing_localities); continue; - }; + } if (!strcmp(this_char, "old_hashed_relocation")) { SET_OPTION(old_hashed_relocation); @@ -729,6 +726,7 @@ void show_alloc_options(struct seq_file *seq, struct super_block *s) static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) { char *hash_in; + if (hint->formatted_node) { hash_in = (char *)&hint->key.k_dir_id; } else { @@ -757,6 +755,7 @@ static void dirid_groups(reiserfs_blocknr_hint_t * hint) __u32 dirid = 0; int bm = 0; struct super_block *sb = hint->th->t_super; + if (hint->inode) dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); else if (hint->formatted_node) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index b14706a05d52..615cd9ab7940 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -228,10 +228,10 @@ const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; /* Maximal possible key. It is never in the tree. */ static const struct reiserfs_key MAX_KEY = { - __constant_cpu_to_le32(0xffffffff), - __constant_cpu_to_le32(0xffffffff), - {{__constant_cpu_to_le32(0xffffffff), - __constant_cpu_to_le32(0xffffffff)},} + cpu_to_le32(0xffffffff), + cpu_to_le32(0xffffffff), + {{cpu_to_le32(0xffffffff), + cpu_to_le32(0xffffffff)},} }; /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom diff --git a/fs/splice.c b/fs/splice.c index 9bc07d2b53cf..e246954ea48c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1537,7 +1537,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - ssize_t count = 0; + ssize_t count; pipe = get_pipe_info(file); if (!pipe) @@ -1546,8 +1546,9 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, ret = rw_copy_check_uvector(READ, uiov, nr_segs, ARRAY_SIZE(iovstack), iovstack, &iov); if (ret <= 0) - return ret; + goto out; + count = ret; iov_iter_init(&iter, iov, nr_segs, count, 0); sd.len = 0; @@ -1560,6 +1561,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, ret = __splice_from_pipe(pipe, &sd, pipe_to_user); pipe_unlock(pipe); +out: if (iov != iovstack) kfree(iov); diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 9e1bb79f7e6f..887d6d270080 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -25,7 +25,7 @@ #define ERROR(s, args...) pr_err("SQUASHFS error: "s, ## args) -#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) +#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ extern int squashfs_read_data(struct super_block *, u64, int, u64 *, diff --git a/fs/super.c b/fs/super.c index 48377f7463c0..d20d5b11dedf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -112,9 +112,14 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); - if (!grab_super_passive(sb)) - return 0; - + /* + * Don't call grab_super_passive as it is a potential + * scalability bottleneck. The counts could get updated + * between super_cache_count and super_cache_scan anyway. + * Call to super_cache_count with shrinker_rwsem held + * ensures the safety of call to list_lru_count_node() and + * s_op->nr_cached_objects(). + */ if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc->nid); @@ -125,7 +130,6 @@ static unsigned long super_cache_count(struct shrinker *shrink, sc->nid); total_objects = vfs_pressure_ratio(total_objects); - drop_super(sb); return total_objects; } @@ -276,10 +280,8 @@ void deactivate_locked_super(struct super_block *s) struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { cleancache_invalidate_fs(s); - fs->kill_sb(s); - - /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); + fs->kill_sb(s); put_filesystem(fs); put_super(s); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 28cc1acd5439..e9ef59b3abb1 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -47,12 +47,13 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v) ssize_t count; char *buf; - /* acquire buffer and ensure that it's >= PAGE_SIZE */ + /* acquire buffer and ensure that it's >= PAGE_SIZE and clear */ count = seq_get_buf(sf, &buf); if (count < PAGE_SIZE) { seq_commit(sf, -1); return 0; } + memset(buf, 0, PAGE_SIZE); /* * Invoke show(). Control may reach here via seq file lseek even diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index aa0406895b53..7d2a860ba788 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -18,7 +18,7 @@ #include "sysfs.h" -static void remove_files(struct kernfs_node *parent, struct kobject *kobj, +static void remove_files(struct kernfs_node *parent, const struct attribute_group *grp) { struct attribute *const *attr; @@ -29,7 +29,7 @@ static void remove_files(struct kernfs_node *parent, struct kobject *kobj, kernfs_remove_by_name(parent, (*attr)->name); if (grp->bin_attrs) for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) - sysfs_remove_bin_file(kobj, *bin_attr); + kernfs_remove_by_name(parent, (*bin_attr)->attr.name); } static int create_files(struct kernfs_node *parent, struct kobject *kobj, @@ -62,7 +62,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, break; } if (error) { - remove_files(parent, kobj, grp); + remove_files(parent, grp); goto exit; } } @@ -79,7 +79,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, break; } if (error) - remove_files(parent, kobj, grp); + remove_files(parent, grp); } exit: return error; @@ -224,7 +224,7 @@ void sysfs_remove_group(struct kobject *kobj, kernfs_get(kn); } - remove_files(kn, kobj, grp); + remove_files(kn, grp); if (grp->name) kernfs_remove(kn); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index a66ad6196f59..8a49486bf30c 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -13,6 +13,7 @@ #define DEBUG #include <linux/fs.h> +#include <linux/magic.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/user_namespace.h> @@ -38,7 +39,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, } ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns); + root = kernfs_mount_ns(fs_type, flags, sysfs_root, + SYSFS_MAGIC, &new_sb, ns); if (IS_ERR(root) || !new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); return root; @@ -63,7 +65,8 @@ int __init sysfs_init(void) { int err; - sysfs_root = kernfs_create_root(NULL, 0, NULL); + sysfs_root = kernfs_create_root(NULL, KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, + NULL); if (IS_ERR(sysfs_root)) return PTR_ERR(sysfs_root); diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 4b826abb1528..45d4e96a6bac 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -460,9 +460,9 @@ static int write_cnodes(struct ubifs_info *c) * important. */ clear_bit(DIRTY_CNODE, &cnode->flags); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(COW_CNODE, &cnode->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); offs += len; dbg_chk_lpt_sz(c, 1, len); cnode = cnode->cnext; diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 52a6559275c4..3600994f8411 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -895,9 +895,9 @@ static int write_index(struct ubifs_info *c) * the reason for the second barrier. */ clear_bit(DIRTY_ZNODE, &znode->flags); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(COW_ZNODE, &znode->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); /* * We have marked the znode as clean but have not updated the diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 0ab1de4b39a5..7bc20809c99e 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -24,7 +24,7 @@ #define INVBLOCK ((u64)-1L) -static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned, int *); +static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned); static u64 ufs_alloc_fragments(struct inode *, unsigned, u64, unsigned, int *); static u64 ufs_alloccg_block(struct inode *, struct ufs_cg_private_info *, u64, int *); static u64 ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, u64, unsigned); @@ -52,7 +52,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); - mutex_lock(&UFS_SB(sb)->s_lock); + lock_ufs(sb); cgno = ufs_dtog(uspi, fragment); bit = ufs_dtogd(uspi, fragment); @@ -116,12 +116,12 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT\n"); return; failed: - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT (FAILED)\n"); return; } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) goto failed; } - mutex_lock(&UFS_SB(sb)->s_lock); + lock_ufs(sb); do_more: overflow = 0; @@ -211,12 +211,12 @@ do_more: } ufs_mark_sb_dirty(sb); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT\n"); return; failed_unlock: - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); failed: UFSD("EXIT (FAILED)\n"); return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, usb1 = ubh_get_usb_first(uspi); *err = -ENOSPC; - mutex_lock(&UFS_SB(sb)->s_lock); + lock_ufs(sb); tmp = ufs_data_ptr_to_cpu(sb, p); if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, "fragment %llu, tmp %llu\n", (unsigned long long)fragment, (unsigned long long)tmp); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); return INVBLOCK; } if (fragment < UFS_I(inode)->i_lastfrag) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); return 0; } } else { if (tmp) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); return 0; } } @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, * There is not enough space for user on the device */ if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT (FAILED)\n"); return 0; } @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); } - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -432,14 +432,14 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, /* * resize block */ - result = ufs_add_fragments (inode, tmp, oldcount, newcount, err); + result = ufs_add_fragments(inode, tmp, oldcount, newcount); if (result) { *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -485,13 +485,13 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return result; } - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT (FAILED)\n"); return 0; } static u64 ufs_add_fragments(struct inode *inode, u64 fragment, - unsigned oldcount, unsigned newcount, int *err) + unsigned oldcount, unsigned newcount) { struct super_block * sb; struct ufs_sb_private_info * uspi; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 98f7211599ff..a9cc75ffa925 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode) ino = inode->i_ino; - mutex_lock(&UFS_SB(sb)->s_lock); + lock_ufs(sb); if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); return; } @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode) bit = ufs_inotocgoff (ino); ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) { - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); return; } ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - mutex_unlock(&UFS_SB(sb)->s_lock); + unlock_ufs(sb); UFSD("EXIT\n"); } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sbi = UFS_SB(sb); uspi = sbi->s_uspi; - mutex_lock(&sbi->s_lock); + lock_ufs(sb); /* * Try to place the inode in its parent directory @@ -328,21 +328,20 @@ cg_found: sync_dirty_buffer(bh); brelse(bh); } - - mutex_unlock(&sbi->s_lock); + unlock_ufs(sb); UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: - mutex_unlock(&sbi->s_lock); + unlock_ufs(sb); clear_nlink(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: - mutex_unlock(&sbi->s_lock); + unlock_ufs(sb); make_bad_inode(inode); iput (inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index c1183f9f69dc..b879f1ba3439 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -697,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) unsigned flags; lock_ufs(sb); - mutex_lock(&UFS_SB(sb)->s_lock); UFSD("ENTER\n"); @@ -715,7 +714,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_put_cstotal(sb); UFSD("EXIT\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; @@ -760,6 +758,7 @@ static void ufs_put_super(struct super_block *sb) ubh_brelse_uspi (sbi->s_uspi); kfree (sbi->s_uspi); + mutex_destroy(&sbi->mutex); kfree (sbi); sb->s_fs_info = NULL; UFSD("EXIT\n"); @@ -786,6 +785,14 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) flags = 0; UFSD("ENTER\n"); + +#ifndef CONFIG_UFS_FS_WRITE + if (!(sb->s_flags & MS_RDONLY)) { + printk("ufs was compiled with read-only support, " + "can't be mounted as read-write\n"); + return -EROFS; + } +#endif sbi = kzalloc(sizeof(struct ufs_sb_info), GFP_KERNEL); if (!sbi) @@ -795,15 +802,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); -#ifndef CONFIG_UFS_FS_WRITE - if (!(sb->s_flags & MS_RDONLY)) { - printk("ufs was compiled with read-only support, " - "can't be mounted as read-write\n"); - goto failed; - } -#endif mutex_init(&sbi->mutex); - mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* @@ -1257,6 +1256,7 @@ magic_found: return 0; failed: + mutex_destroy(&sbi->mutex); if (ubh) ubh_brelse_uspi (uspi); kfree (uspi); @@ -1280,7 +1280,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) sync_filesystem(sb); lock_ufs(sb); - mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1294,7 +1293,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } @@ -1302,14 +1300,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { printk("ufstype can't be changed during remount\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } @@ -1334,7 +1330,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #ifndef CONFIG_UFS_FS_WRITE printk("ufs was compiled with read-only support, " "can't be mounted as read-write\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; #else @@ -1344,13 +1339,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { printk("this ufstype is read-only supported\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { printk("failed during remounting\n"); - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EPERM; } @@ -1358,7 +1351,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; - mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index ff2c15ab81aa..343e6fc571e5 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -24,7 +24,6 @@ struct ufs_sb_info { int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ - struct mutex s_lock; }; struct ufs_inode_info { diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1399e187d425..753e467aa1a5 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 951a2321ee01..830c1c937b88 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -155,7 +155,7 @@ xfs_dir_fsync( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } STATIC int @@ -295,7 +295,7 @@ xfs_file_aio_read( xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { - ret = -filemap_write_and_wait_range( + ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, pos, -1); if (ret) { @@ -837,7 +837,7 @@ xfs_file_fallocate( unsigned blksize_mask = (1 << inode->i_blkbits) - 1; if (offset & blksize_mask || len & blksize_mask) { - error = -EINVAL; + error = EINVAL; goto out_unlock; } @@ -846,7 +846,7 @@ xfs_file_fallocate( * in which case it is effectively a truncate operation */ if (offset + len >= i_size_read(inode)) { - error = -EINVAL; + error = EINVAL; goto out_unlock; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 301ecbfcc0be..36d630319a27 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -72,8 +72,8 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = xfs_attr_set(ip, xattr->name, xattr->value, - xattr->value_len, ATTR_SECURE); + error = -xfs_attr_set(ip, xattr->name, xattr->value, + xattr->value_len, ATTR_SECURE); if (error < 0) break; } @@ -93,8 +93,8 @@ xfs_init_security( struct inode *dir, const struct qstr *qstr) { - return security_inode_init_security(inode, dir, qstr, - &xfs_initxattrs, NULL); + return -security_inode_init_security(inode, dir, qstr, + &xfs_initxattrs, NULL); } static void @@ -173,12 +173,12 @@ xfs_generic_create( #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { - error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 348e4d2ed6e6..dc977b6e6a36 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -843,22 +843,17 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); - if ((error = list_lru_init(&qinf->qi_lru))) { - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } + error = -list_lru_init(&qinf->qi_lru); + if (error) + goto out_free_qinf; /* * See if quotainodes are setup, and if not, allocate them, * and change the superblock accordingly. */ - if ((error = xfs_qm_init_quotainos(mp))) { - list_lru_destroy(&qinf->qi_lru); - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } + error = xfs_qm_init_quotainos(mp); + if (error) + goto out_free_lru; INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); @@ -918,7 +913,7 @@ xfs_qm_init_quotainfo( qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - + xfs_qm_dqdestroy(dqp); } else { qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -935,6 +930,13 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&qinf->qi_shrinker); return 0; + +out_free_lru: + list_lru_destroy(&qinf->qi_lru); +out_free_qinf: + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 205376776377..3494eff8e4eb 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1433,11 +1433,11 @@ xfs_fs_fill_super( if (error) goto out_free_fsname; - error = xfs_init_mount_workqueues(mp); + error = -xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; - error = xfs_icsb_init_counters(mp); + error = -xfs_icsb_init_counters(mp); if (error) goto out_destroy_workqueues; |