diff options
Diffstat (limited to 'fs')
73 files changed, 1021 insertions, 632 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 14a86448572c..69357c0d9899 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s) P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); - v9fs_dentry_release(s->s_root); /* clunk root */ + if (s->s_root) + v9fs_dentry_release(s->s_root); /* clunk root */ kill_anon_super(s); diff --git a/fs/affs/affs.h b/fs/affs/affs.h index e511dc621a2e..0e40caaba456 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -106,8 +106,8 @@ struct affs_sb_info { u32 s_last_bmap; struct buffer_head *s_bmap_bh; char *s_prefix; /* Prefix for volumes and assigns. */ - int s_prefix_len; /* Length of prefix. */ char s_volume[32]; /* Volume prefix for absolute symlinks. */ + spinlock_t symlink_lock; /* protects the previous two */ }; #define SF_INTL 0x0001 /* International filesystem. */ diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 960d336ec694..d70bbbac6b7b 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) p = (char *)AFFS_HEAD(bh)->table; lc = '/'; if (*symname == '/') { + struct affs_sb_info *sbi = AFFS_SB(sb); while (*symname == '/') symname++; - while (AFFS_SB(sb)->s_volume[i]) /* Cannot overflow */ - *p++ = AFFS_SB(sb)->s_volume[i++]; + spin_lock(&sbi->symlink_lock); + while (sbi->s_volume[i]) /* Cannot overflow */ + *p++ = sbi->s_volume[i++]; + spin_unlock(&sbi->symlink_lock); } while (i < maxlen && (c = *symname++)) { if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') { diff --git a/fs/affs/super.c b/fs/affs/super.c index 104fdcb3a7fc..d41e9673cd97 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s switch (token) { case Opt_bs: if (match_int(&args[0], &n)) - return -EINVAL; + return 0; if (n != 512 && n != 1024 && n != 2048 && n != 4096) { printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n"); @@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_mode: if (match_octal(&args[0], &option)) - return 1; + return 0; *mode = option & 0777; *mount_opts |= SF_SETMODE; break; @@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s *mount_opts |= SF_MUFS; break; case Opt_prefix: - /* Free any previous prefix */ - kfree(*prefix); *prefix = match_strdup(&args[0]); if (!*prefix) return 0; @@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_reserved: if (match_int(&args[0], reserved)) - return 1; + return 0; break; case Opt_root: if (match_int(&args[0], root)) - return 1; + return 0; break; case Opt_setgid: if (match_int(&args[0], &option)) - return 1; + return 0; *gid = option; *mount_opts |= SF_SETGID; break; case Opt_setuid: if (match_int(&args[0], &option)) - return -EINVAL; + return 0; *uid = option; *mount_opts |= SF_SETUID; break; @@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; sb->s_fs_info = sbi; mutex_init(&sbi->s_bmlock); + spin_lock_init(&sbi->symlink_lock); if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, &blocksize,&sbi->s_prefix, sbi->s_volume, &mount_flags)) { printk(KERN_ERR "AFFS: Error parsing options\n"); + kfree(sbi->s_prefix); + kfree(sbi); return -EINVAL; } /* N.B. after this point s_prefix must be released */ @@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data) unsigned long mount_flags; int res = 0; char *new_opts = kstrdup(data, GFP_KERNEL); + char volume[32]; + char *prefix = NULL; pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); *flags |= MS_NODIRATIME; + memcpy(volume, sbi->s_volume, 32); if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block, - &blocksize, &sbi->s_prefix, sbi->s_volume, + &blocksize, &prefix, volume, &mount_flags)) { + kfree(prefix); kfree(new_opts); return -EINVAL; } @@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data) sbi->s_mode = mode; sbi->s_uid = uid; sbi->s_gid = gid; + /* protect against readers */ + spin_lock(&sbi->symlink_lock); + if (prefix) { + kfree(sbi->s_prefix); + sbi->s_prefix = prefix; + } + memcpy(sbi->s_volume, volume, 32); + spin_unlock(&sbi->symlink_lock); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { unlock_kernel(); diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index 41782539c907..ee00f08c4f53 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page) int i, j; char c; char lc; - char *pf; pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); @@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page) j = 0; lf = (struct slink_front *)bh->b_data; lc = 0; - pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/"; if (strchr(lf->symname,':')) { /* Handle assign or volume name */ + struct affs_sb_info *sbi = AFFS_SB(inode->i_sb); + char *pf; + spin_lock(&sbi->symlink_lock); + pf = sbi->s_prefix ? sbi->s_prefix : "/"; while (i < 1023 && (c = pf[i])) link[i++] = c; + spin_unlock(&sbi->symlink_lock); while (i < 1023 && lf->symname[j] != ':') link[i++] = lf->symname[j++]; if (i < 1023) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 33baf27fac78..34ddda888e63 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -873,6 +873,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); unacquire_priv_sbp: + kfree(befs_sb->mount_opts.iocharset); kfree(sb->s_fs_info); unacquire_none: diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 6f60336c6628..8f3d9fd89604 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct inode *inode; unsigned i, imap_len; struct bfs_sb_info *info; - long ret = -EINVAL; + int ret = -EINVAL; unsigned long i_sblock, i_eblock, i_eoff, s_size; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; + mutex_init(&info->bfs_lock); s->s_fs_info = info; sb_set_blocksize(s, BFS_BSIZE); - bh = sb_bread(s, 0); - if(!bh) + info->si_sbh = sb_bread(s, 0); + if (!info->si_sbh) goto out; - bfs_sb = (struct bfs_super_block *)bh->b_data; + bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data; if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic)); - goto out; + goto out1; } if (BFS_UNCLEAN(bfs_sb, s) && !silent) printf("%s is unclean, continuing\n", s->s_id); s->s_magic = BFS_MAGIC; - info->si_sbh = bh; if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) { printf("Superblock is corrupted\n"); - goto out; + goto out1; } info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / @@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) imap_len = (info->si_lasti / 8) + 1; info->si_imap = kzalloc(imap_len, GFP_KERNEL); if (!info->si_imap) - goto out; + goto out1; for (i = 0; i < BFS_ROOT_INO; i++) set_bit(i, info->si_imap); @@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) inode = bfs_iget(s, BFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - kfree(info->si_imap); - goto out; + goto out2; } s->s_root = d_alloc_root(inode); if (!s->s_root) { iput(inode); ret = -ENOMEM; - kfree(info->si_imap); - goto out; + goto out2; } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; @@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) bh = sb_bread(s, info->si_blocks - 1); if (!bh) { printf("Last block not available: %lu\n", info->si_blocks - 1); - iput(inode); ret = -EIO; - kfree(info->si_imap); - goto out; + goto out3; } brelse(bh); @@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) printf("Inode 0x%08x corrupted\n", i); brelse(bh); - s->s_root = NULL; - kfree(info->si_imap); - kfree(info); - s->s_fs_info = NULL; - return -EIO; + ret = -EIO; + goto out3; } if (!di->i_ino) { @@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_dirt = 1; } dump_imap("read_super", s); - mutex_init(&info->bfs_lock); return 0; +out3: + dput(s->s_root); + s->s_root = NULL; +out2: + kfree(info->si_imap); +out1: + brelse(info->si_sbh); out: - brelse(bh); + mutex_destroy(&info->bfs_lock); kfree(info); s->s_fs_info = NULL; return ret; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index b639dcf7c778..0133b5a30413 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -263,6 +263,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else set_personality(PER_LINUX); #endif + setup_new_exec(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b9b3bb51b1e4..1ed37ba72343 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* - * The early SET_PERSONALITY here is so that the lookup - * for the interpreter happens in the namespace of the - * to-be-execed image. SET_PERSONALITY can select an - * alternate root. - * - * However, SET_PERSONALITY is NOT allowed to switch - * this task into the new images's memory mapping - * policy - that is, TASK_SIZE must still evaluate to - * that which is appropriate to the execing application. - * This is because exit_mmap() needs to have TASK_SIZE - * evaluate to the size of the old image. - * - * So if (say) a 64-bit application is execing a 32-bit - * application it is the architecture's responsibility - * to defer changing the value of TASK_SIZE until the - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ - SET_PERSONALITY(loc->elf_ex); - interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) @@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; - arch_pick_mmap_layout(current->mm); + + setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 38502c67987c..e7a0bb47252c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, #ifdef ELF_FDPIC_PLAT_INIT unsigned long dynaddr; #endif +#ifndef CONFIG_MMU + unsigned long stack_prot; +#endif struct file *interpreter = NULL; /* to shut gcc up */ char *interpreter_name = NULL; int executable_stack; @@ -316,6 +319,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, * defunct, deceased, etc. after this point we have to exit via * error_kill */ set_personality(PER_LINUX_FDPIC); + if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) + current->personality |= READ_IMPLIES_EXEC; + + setup_new_exec(bprm); + set_binfmt(&elf_fdpic_format); current->mm->start_code = 0; @@ -377,9 +385,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, if (stack_size < PAGE_SIZE * 2) stack_size = PAGE_SIZE * 2; + stack_prot = PROT_READ | PROT_WRITE; + if (executable_stack == EXSTACK_ENABLE_X || + (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC)) + stack_prot |= PROT_EXEC; + down_write(¤t->mm->mmap_sem); - current->mm->start_brk = do_mmap(NULL, 0, stack_size, - PROT_READ | PROT_WRITE | PROT_EXEC, + current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, 0); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index a2796651e756..ca88c4687b84 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); + setup_new_exec(bprm); } /* diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index eff74b9c9e77..35cf0023ddb8 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; + setup_new_exec(bprm); /* Set the task size for HP-UX processes such that * the gateway page is outside the address space. diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 49a34e7f7306..a16f29e888cd 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -61,7 +61,7 @@ static inline unsigned int vecs_to_idx(unsigned int nr) static inline int use_bip_pool(unsigned int idx) { - if (idx == BIOVEC_NR_POOLS) + if (idx == BIOVEC_MAX_IDX) return 1; return 0; @@ -95,6 +95,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, /* Use mempool if lower order alloc failed or max vecs were requested */ if (bip == NULL) { + idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */ bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); if (unlikely(bip == NULL)) { @@ -542,13 +542,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page if (page == prev->bv_page && offset == prev->bv_offset + prev->bv_len) { + unsigned int prev_bv_len = prev->bv_len; prev->bv_len += len; if (q->merge_bvec_fn) { struct bvec_merge_data bvm = { + /* prev_bvec is already charged in + bi_size, discharge it in order to + simulate merging updated prev_bvec + as new bvec. */ .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_sector, - .bi_size = bio->bi_size, + .bi_size = bio->bi_size - prev_bv_len, .bi_rw = bio->bi_rw, }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 8bed0557d88c..34e2d2033765 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -246,7 +246,8 @@ struct super_block *freeze_bdev(struct block_device *bdev) if (!sb) goto out; if (sb->s_flags & MS_RDONLY) { - deactivate_locked_super(sb); + sb->s_frozen = SB_FREEZE_TRANS; + up_write(&sb->s_umount); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } @@ -307,7 +308,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) BUG_ON(sb->s_bdev != bdev); down_write(&sb->s_umount); if (sb->s_flags & MS_RDONLY) - goto out_deactivate; + goto out_unfrozen; if (sb->s_op->unfreeze_fs) { error = sb->s_op->unfreeze_fs(sb); @@ -321,11 +322,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) } } +out_unfrozen: sb->s_frozen = SB_UNFROZEN; smp_wmb(); wake_up(&sb->s_wait_unfrozen); -out_deactivate: if (sb) deactivate_locked_super(sb); out_unlock: diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 63ea83ff687f..3bbcaa716b3c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2287,12 +2287,12 @@ int cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, char *mount_data_global, const char *devname) { - int rc = 0; + int rc; int xid; struct smb_vol *volume_info; - struct cifsSesInfo *pSesInfo = NULL; - struct cifsTconInfo *tcon = NULL; - struct TCP_Server_Info *srvTcp = NULL; + struct cifsSesInfo *pSesInfo; + struct cifsTconInfo *tcon; + struct TCP_Server_Info *srvTcp; char *full_path; char *mount_data = mount_data_global; #ifdef CONFIG_CIFS_DFS_UPCALL @@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, int referral_walks_count = 0; try_mount_again: #endif + rc = 0; + tcon = NULL; + pSesInfo = NULL; + srvTcp = NULL; full_path = NULL; xid = GetXid(); @@ -2597,6 +2601,7 @@ remote_path_check: cleanup_volume_info(&volume_info); referral_walks_count++; + FreeXid(xid); goto try_mount_again; } #else /* No DFS support, return error on mount */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f84062f9a985..f5618f8cc462 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -666,6 +666,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, min(len, max_len), nlt, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + pqst->len -= nls_nullsize(nlt); } else { pqst->name = filename; pqst->len = len; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d22438ef7674..39c6ee868d7b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -32,7 +32,9 @@ static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; -static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) +static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev, + void *data, const struct file_operations *fops) + { struct inode *inode = new_inode(sb); @@ -44,14 +46,18 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d init_special_inode(inode, mode, dev); break; case S_IFREG: - inode->i_fop = &debugfs_file_operations; + inode->i_fop = fops ? fops : &debugfs_file_operations; + inode->i_private = data; break; case S_IFLNK: inode->i_op = &debugfs_link_operations; + inode->i_fop = fops; + inode->i_private = data; break; case S_IFDIR: inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = fops ? fops : &simple_dir_operations; + inode->i_private = data; /* directory inodes start off with i_nlink == 2 * (for "." entry) */ @@ -64,7 +70,8 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d /* SMP-safe */ static int debugfs_mknod(struct inode *dir, struct dentry *dentry, - int mode, dev_t dev) + int mode, dev_t dev, void *data, + const struct file_operations *fops) { struct inode *inode; int error = -EPERM; @@ -72,7 +79,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry, if (dentry->d_inode) return -EEXIST; - inode = debugfs_get_inode(dir->i_sb, mode, dev); + inode = debugfs_get_inode(dir->i_sb, mode, dev, data, fops); if (inode) { d_instantiate(dentry, inode); dget(dentry); @@ -81,12 +88,13 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry, return error; } -static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode, + void *data, const struct file_operations *fops) { int res; mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; - res = debugfs_mknod(dir, dentry, mode, 0); + res = debugfs_mknod(dir, dentry, mode, 0, data, fops); if (!res) { inc_nlink(dir); fsnotify_mkdir(dir, dentry); @@ -94,18 +102,20 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return res; } -static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode) +static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode, + void *data, const struct file_operations *fops) { mode = (mode & S_IALLUGO) | S_IFLNK; - return debugfs_mknod(dir, dentry, mode, 0); + return debugfs_mknod(dir, dentry, mode, 0, data, fops); } -static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode, + void *data, const struct file_operations *fops) { int res; mode = (mode & S_IALLUGO) | S_IFREG; - res = debugfs_mknod(dir, dentry, mode, 0); + res = debugfs_mknod(dir, dentry, mode, 0, data, fops); if (!res) fsnotify_create(dir, dentry); return res; @@ -139,7 +149,9 @@ static struct file_system_type debug_fs_type = { static int debugfs_create_by_name(const char *name, mode_t mode, struct dentry *parent, - struct dentry **dentry) + struct dentry **dentry, + void *data, + const struct file_operations *fops) { int error = 0; @@ -164,13 +176,16 @@ static int debugfs_create_by_name(const char *name, mode_t mode, if (!IS_ERR(*dentry)) { switch (mode & S_IFMT) { case S_IFDIR: - error = debugfs_mkdir(parent->d_inode, *dentry, mode); + error = debugfs_mkdir(parent->d_inode, *dentry, mode, + data, fops); break; case S_IFLNK: - error = debugfs_link(parent->d_inode, *dentry, mode); + error = debugfs_link(parent->d_inode, *dentry, mode, + data, fops); break; default: - error = debugfs_create(parent->d_inode, *dentry, mode); + error = debugfs_create(parent->d_inode, *dentry, mode, + data, fops); break; } dput(*dentry); @@ -221,19 +236,13 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode, if (error) goto exit; - error = debugfs_create_by_name(name, mode, parent, &dentry); + error = debugfs_create_by_name(name, mode, parent, &dentry, + data, fops); if (error) { dentry = NULL; simple_release_fs(&debugfs_mount, &debugfs_mount_count); goto exit; } - - if (dentry->d_inode) { - if (data) - dentry->d_inode->i_private = data; - if (fops) - dentry->d_inode->i_fop = fops; - } exit: return dentry; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index d5f8c96964be..8882ecc0f1bf 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -517,11 +517,23 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) { + struct dentry *dentry; + struct tty_struct *tty; + BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); + /* Ensure dentry has not been deleted by devpts_pty_kill() */ + dentry = d_find_alias(pts_inode); + if (!dentry) + return NULL; + + tty = NULL; if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - return (struct tty_struct *)pts_inode->i_private; - return NULL; + tty = (struct tty_struct *)pts_inode->i_private; + + dput(dentry); + + return tty; } void devpts_pty_kill(struct tty_struct *tty) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index fbb6e5eed697..7cb0a59f4b9d 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, size_t *key_size) { char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; - char *full_alg_name; + char *full_alg_name = NULL; int rc; *key_tfm = NULL; @@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, if (rc) goto out; *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); - kfree(full_alg_name); if (IS_ERR(*key_tfm)) { rc = PTR_ERR(*key_tfm); printk(KERN_ERR "Unable to allocate crypto cipher with name " @@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, goto out; } out: + kfree(full_alg_name); return rc; } diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9e944057001b..1744f17ce96e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) - && !(file->f_flags & O_RDONLY)) { - rc = -EPERM; - printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " - "file must hence be opened RO\n", __func__); - goto out; - } if (!ecryptfs_inode_to_private(inode)->lower_file) { rc = ecryptfs_init_persistent_file(ecryptfs_dentry); if (rc) { @@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file) goto out; } } + if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) + && !(file->f_flags & O_RDONLY)) { + rc = -EPERM; + printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " + "file must hence be opened RO\n", __func__); + goto out; + } ecryptfs_set_file_lower( file, ecryptfs_inode_to_private(inode)->lower_file); if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 056fed62d0de..728f07ebc593 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -971,6 +971,21 @@ out: return rc; } +int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct kstat lower_stat; + int rc; + + rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), + ecryptfs_dentry_to_lower(dentry), &lower_stat); + if (!rc) { + generic_fillattr(dentry->d_inode, stat); + stat->blocks = lower_stat.blocks; + } + return rc; +} + int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) @@ -1100,6 +1115,7 @@ const struct inode_operations ecryptfs_dir_iops = { const struct inode_operations ecryptfs_main_iops = { .permission = ecryptfs_permission, .setattr = ecryptfs_setattr, + .getattr = ecryptfs_getattr, .setxattr = ecryptfs_setxattr, .getxattr = ecryptfs_getxattr, .listxattr = ecryptfs_listxattr, diff --git a/fs/exec.c b/fs/exec.c index ba112bd4a339..da36c206f0f1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -572,6 +572,9 @@ int setup_arg_pages(struct linux_binprm *bprm, struct vm_area_struct *prev = NULL; unsigned long vm_flags; unsigned long stack_base; + unsigned long stack_size; + unsigned long stack_expand; + unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP /* Limit stack size to 1GB */ @@ -628,10 +631,24 @@ int setup_arg_pages(struct linux_binprm *bprm, goto out_unlock; } + stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE; + stack_size = vma->vm_end - vma->vm_start; + /* + * Align this down to a page boundary as expand_stack + * will align it up. + */ + rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; + rlim_stack = min(rlim_stack, stack_size); #ifdef CONFIG_STACK_GROWSUP - stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (stack_size + stack_expand > rlim_stack) + stack_base = vma->vm_start + rlim_stack; + else + stack_base = vma->vm_end + stack_expand; #else - stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (stack_size + stack_expand > rlim_stack) + stack_base = vma->vm_end - rlim_stack; + else + stack_base = vma->vm_start - stack_expand; #endif ret = expand_stack(vma, stack_base); if (ret) @@ -931,9 +948,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -954,6 +969,25 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ + current->flags &= ~PF_RANDOMIZE; + flush_thread(); + current->personality &= ~bprm->per_clear; + + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); + /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -975,9 +1009,6 @@ int flush_old_exec(struct linux_binprm * bprm) tcomm[i] = '\0'; set_task_comm(current, tcomm); - current->flags &= ~PF_RANDOMIZE; - flush_thread(); - /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc @@ -993,8 +1024,6 @@ int flush_old_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - current->personality &= ~bprm->per_clear; - /* * Flush performance counters when crossing a * security domain: @@ -1009,14 +1038,8 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); - - return 0; - -out: - return retval; } - -EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(setup_new_exec); /* * Prepare credentials and lock ->cred_guard_mutex. diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 6c10f7476699..6f7df0f7d282 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -731,13 +731,28 @@ static int exofs_write_begin_export(struct file *file, fsdata); } +static int exofs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + /* According to comment in simple_write_end i_mutex is held */ + loff_t i_size = inode->i_size; + int ret; + + ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); + if (i_size != inode->i_size) + mark_inode_dirty(inode); + return ret; +} + const struct address_space_operations exofs_aops = { .readpage = exofs_readpage, .readpages = exofs_readpages, .writepage = exofs_writepage, .writepages = exofs_writepages, .write_begin = exofs_write_begin_export, - .write_end = simple_write_end, + .write_end = exofs_write_end, }; /****************************************************************************** diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 354ed3b47b30..f9d69378caaa 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle, return ext3_journal_get_write_access(handle, bh); } +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static void ext3_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext3_truncate(inode); +} + static int ext3_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1209,7 +1219,7 @@ write_begin_failed: unlock_page(page); page_cache_release(page); if (pos + len > inode->i_size) - ext3_truncate(inode); + ext3_truncate_failed_write(inode); } if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; @@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file, page_cache_release(page); if (pos + len > inode->i_size) - ext3_truncate(inode); + ext3_truncate_failed_write(inode); return ret ? ret : copied; } @@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file, page_cache_release(page); if (pos + len > inode->i_size) - ext3_truncate(inode); + ext3_truncate_failed_write(inode); return ret ? ret : copied; } @@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file, page_cache_release(page); if (pos + len > inode->i_size) - ext3_truncate(inode); + ext3_truncate_failed_write(inode); return ret ? ret : copied; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d0418980f8d..f3032c919a22 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, ext4_group_t group) { - return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; + if (!ext4_bg_has_super(sb, group)) + return 0; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) + return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); + else + return EXT4_SB(sb)->s_gdb_count; } /** diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 50784ef07563..dc79b75d8f70 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb) if (ext4_bg_has_super(sb, i) && ((i < 5) || ((i % flex_size) == 0))) add_system_zone(sbi, ext4_group_first_block_no(sb, i), - sbi->s_gdb_count + 1); + ext4_bg_num_gdb(sb, i) + 1); gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); if (ret) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8825515eeddd..d0a2afbab009 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -698,11 +698,22 @@ struct ext4_inode_info { __u16 i_extra_isize; spinlock_t i_block_reservation_lock; +#ifdef CONFIG_QUOTA + /* quota space reservation, managed internally by quota code */ + qsize_t i_reserved_quota; +#endif /* completed async DIOs that might need unwritten extents handling */ struct list_head i_aio_dio_complete_list; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; + + /* + * Transactions that contain inode's metadata needed to complete + * fsync and fdatasync, respectively. + */ + tid_t i_sync_tid; + tid_t i_datasync_tid; }; /* @@ -750,6 +761,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ +#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define set_opt(o, opt) o |= EXT4_MOUNT_##opt @@ -1424,7 +1436,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); -extern qsize_t ext4_get_reserved_space(struct inode *inode); +extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a2865980342f..1892a7763426 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -49,7 +49,7 @@ #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ EXT4_XATTR_TRANS_BLOCKS - 2 + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* * Define the number of metadata blocks we need to account to modify data. @@ -57,7 +57,7 @@ * This include super block, inode block, quota blocks and xattr blocks */ #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* Delete operations potentially hit one directory's namespace plus an * entire inode, plus arbitrary amounts of bitmap/indirection data. Be @@ -92,6 +92,7 @@ * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) + #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) #else @@ -99,6 +100,9 @@ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 #endif +#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) int ext4_mark_iloc_dirty(handle_t *handle, @@ -254,6 +258,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) return 0; } +static inline void ext4_update_inode_fsync_trans(handle_t *handle, + struct inode *inode, + int datasync) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + if (ext4_handle_valid(handle)) { + ei->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + ei->i_datasync_tid = handle->h_transaction->t_tid; + } +} + /* super.c */ int ext4_force_commit(struct super_block *sb); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 715264b4bae4..8b8bae4c0cf4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, while (block < last && block != EXT_MAX_BLOCK) { num = last - block; /* find extent for this block */ + down_read(&EXT4_I(inode)->i_data_sem); path = ext4_ext_find_extent(inode, block, path); + up_read(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -2074,7 +2076,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ext_debug("free last %u blocks starting %llu\n", num, start); for (i = 0; i < num; i++) { bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, 0, inode, bh, start + i); + ext4_forget(handle, metadata, inode, bh, start + i); } ext4_free_blocks(handle, inode, start, num, metadata); } else if (from == le32_to_cpu(ex->ee_block) @@ -2167,7 +2169,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, correct_index = 1; credits += (ext_depth(inode)) + 1; } - credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); err = ext4_ext_truncate_extend_restart(handle, inode, credits); if (err) @@ -3064,6 +3066,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { ret = ext4_convert_unwritten_extents_dio(handle, inode, path); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); goto out2; } /* buffered IO case */ @@ -3091,6 +3095,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, max_blocks); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out: if (ret <= 0) { err = ret; @@ -3329,10 +3335,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ext4_ext_get_actual_len(&newex); set_buffer_new(bh_result); - /* Cache only when it is _not_ an uninitialized extent */ - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) + /* + * Cache the extent and update transaction to commit on fdatasync only + * when it is _not_ an uninitialized extent. + */ + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { ext4_ext_put_in_cache(inode, iblock, allocated, newblock, EXT4_EXT_CACHE_EXTENT); + ext4_update_inode_fsync_trans(handle, inode, 1); + } else + ext4_update_inode_fsync_trans(handle, inode, 0); out: if (allocated > max_blocks) allocated = max_blocks; @@ -3720,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Walk the extent tree gathering extent information. * ext4_ext_fiemap_cb will push extents back to user. */ - down_read(&EXT4_I(inode)->i_data_sem); error = ext4_ext_walk_space(inode, start_blk, len_blks, ext4_ext_fiemap_cb, fieinfo); - up_read(&EXT4_I(inode)->i_data_sem); } return error; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2b1531266ee2..d6049e40b161 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -51,25 +51,30 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; + struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - int err, ret = 0; + int ret; + tid_t commit_tid; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, dentry, datasync); + if (inode->i_sb->s_flags & MS_RDONLY) + return 0; + ret = flush_aio_dio_completed_IO(inode); if (ret < 0) - goto out; + return ret; + + if (!journal) + return simple_fsync(file, dentry, datasync); + /* - * data=writeback: + * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. - * sync_inode() will sync the metadata - * - * data=ordered: - * The caller's filemap_fdatawrite() will write the data and - * sync_inode() will write the inode if it is dirty. Then the caller's - * filemap_fdatawait() will wait on the pages. + * Metadata is in the journal, we wait for proper transaction to + * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). @@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - goto out; - } + if (ext4_should_journal_data(inode)) + return ext4_force_commit(inode->i_sb); - if (!journal) - ret = sync_mapping_buffers(inode->i_mapping); - - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; - - /* - * The VFS has written the file data. If the inode is unaltered - * then we need not start a commit. - */ - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, /* sys_fsync did this */ - }; - err = sync_inode(inode, &wbc); - if (ret == 0) - ret = err; - } -out: - if (journal && (journal->j_flags & JBD2_BARRIER)) + commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; + if (jbd2_log_start_commit(journal, commit_tid)) + jbd2_log_wait_commit(journal, commit_tid); + else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2c8caa51addb..e233879ebbcb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1021,10 +1021,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, if (!err) err = ext4_splice_branch(handle, inode, iblock, partial, indirect_blks, count); - else + if (err) goto cleanup; set_buffer_new(bh_result); + + ext4_update_inode_fsync_trans(handle, inode, 1); got_it: map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); if (count > blocks_to_boundary) @@ -1043,17 +1045,12 @@ out: return err; } -qsize_t ext4_get_reserved_space(struct inode *inode) +#ifdef CONFIG_QUOTA +qsize_t *ext4_get_reserved_space(struct inode *inode) { - unsigned long long total; - - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - total = EXT4_I(inode)->i_reserved_data_blocks + - EXT4_I(inode)->i_reserved_meta_blocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - - return total; + return &EXT4_I(inode)->i_reserved_quota; } +#endif /* * Calculate the number of metadata blocks need to reserve * to allocate @blocks for non extent file based file @@ -1534,6 +1531,16 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static void ext4_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); +} + static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1599,7 +1606,7 @@ retry: ext4_journal_stop(handle); if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might * still be on the orphan list; we need to @@ -1709,7 +1716,7 @@ static int ext4_ordered_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1751,7 +1758,7 @@ static int ext4_writeback_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1814,7 +1821,7 @@ static int ext4_journalled_write_end(struct file *file, if (!ret) ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1846,19 +1853,17 @@ repeat: md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; total = md_needed + nrblocks; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* * Make quota reservation here to prevent quota overflow * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) { - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + if (vfs_dq_reserve_block(inode, total)) return -EDQUOT; - } if (ext4_claim_free_blocks(sbi, total)) { - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); vfs_dq_release_reservation_block(inode, total); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); @@ -1866,10 +1871,11 @@ repeat: } return -ENOSPC; } + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); EXT4_I(inode)->i_reserved_data_blocks += nrblocks; - EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; - + EXT4_I(inode)->i_reserved_meta_blocks += md_needed; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return 0; /* success */ } @@ -2788,7 +2794,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) * number of contiguous block. So we will limit * number of contiguous block to a sane value */ - if (!(inode->i_flags & EXT4_EXTENTS_FL) && + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && (max_blocks > EXT4_MAX_TRANS_DATA)) max_blocks = EXT4_MAX_TRANS_DATA; @@ -3091,7 +3097,7 @@ retry: * i_size_read because we hold i_mutex. */ if (pos + len > inode->i_size) - ext4_truncate(inode); + ext4_truncate_failed_write(inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -4120,6 +4126,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *last) { __le32 *p; + int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode); + if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); @@ -4150,11 +4158,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, *p = 0; tbh = sb_find_get_block(inode->i_sb, nr); - ext4_forget(handle, 0, inode, tbh, nr); + ext4_forget(handle, is_metadata, inode, tbh, nr); } } - ext4_free_blocks(handle, inode, block_to_free, count, 0); + ext4_free_blocks(handle, inode, block_to_free, count, is_metadata); } /** @@ -4781,8 +4789,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct ext4_iloc iloc; struct ext4_inode *raw_inode; struct ext4_inode_info *ei; - struct buffer_head *bh; struct inode *inode; + journal_t *journal = EXT4_SB(sb)->s_journal; long ret; int block; @@ -4793,11 +4801,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT4_I(inode); + iloc.bh = 0; ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) goto bad_inode; - bh = iloc.bh; raw_inode = ext4_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); @@ -4820,7 +4828,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (inode->i_mode == 0 || !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ - brelse(bh); ret = -ESTALE; goto bad_inode; } @@ -4837,6 +4844,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); ei->i_disksize = inode->i_size; +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_block_group = iloc.block_group; ei->i_last_alloc_group = ~0; @@ -4848,11 +4858,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); + /* + * Set transaction id's of transactions that have to be committed + * to finish f[data]sync. We set them to currently running transaction + * as we cannot be sure that the inode or some of its metadata isn't + * part of the transaction - the inode could have been reclaimed and + * now it is reread from disk. + */ + if (journal) { + transaction_t *transaction; + tid_t tid; + + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction) + transaction = journal->j_running_transaction; + else + transaction = journal->j_committing_transaction; + if (transaction) + tid = transaction->t_tid; + else + tid = journal->j_commit_sequence; + spin_unlock(&journal->j_state_lock); + ei->i_sync_tid = tid; + ei->i_datasync_tid = tid; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { - brelse(bh); ret = -EIO; goto bad_inode; } @@ -4884,10 +4918,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && - ((ei->i_file_acl < - (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + - EXT4_SB(sb)->s_gdb_count)) || - (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { + !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { ext4_error(sb, __func__, "bad extended attribute block %llu in inode #%lu", ei->i_file_acl, inode->i_ino); @@ -4905,10 +4936,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) /* Validate block references which are part of inode */ ret = ext4_check_inode_blockref(inode); } - if (ret) { - brelse(bh); + if (ret) goto bad_inode; - } if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; @@ -4936,7 +4965,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) init_special_inode(inode, inode->i_mode, new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } else { - brelse(bh); ret = -EIO; ext4_error(inode->i_sb, __func__, "bogus i_mode (%o) for inode=%lu", @@ -4949,6 +4977,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; bad_inode: + brelse(iloc.bh); iget_failed(inode); return ERR_PTR(ret); } @@ -5108,6 +5137,7 @@ static int ext4_do_update_inode(handle_t *handle, err = rc; ei->i_state &= ~EXT4_STATE_NEW; + ext4_update_inode_fsync_trans(handle, inode, 0); out_brelse: brelse(bh); ext4_std_error(inode->i_sb, err); @@ -5227,8 +5257,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) /* (user+group)*(old+new) structure, inode write (sb, * inode block, ? - but truncate inode update has it) */ - handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ - EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); + handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ + EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto err_out; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c1cdf613e725..b63d193126db 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -221,31 +221,38 @@ setversion_out: struct file *donor_filp; int err; + if (!(filp->f_mode & FMODE_READ) || + !(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (copy_from_user(&me, (struct move_extent __user *)arg, sizeof(me))) return -EFAULT; + me.moved_len = 0; donor_filp = fget(me.donor_fd); if (!donor_filp) return -EBADF; - if (!capable(CAP_DAC_OVERRIDE)) { - if ((current->real_cred->fsuid != inode->i_uid) || - !(inode->i_mode & S_IRUSR) || - !(donor_filp->f_dentry->d_inode->i_mode & - S_IRUSR)) { - fput(donor_filp); - return -EACCES; - } + if (!(donor_filp->f_mode & FMODE_WRITE)) { + err = -EBADF; + goto mext_out; } + err = mnt_want_write(filp->f_path.mnt); + if (err) + goto mext_out; + err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); - fput(donor_filp); + mnt_drop_write(filp->f_path.mnt); + if (me.moved_len > 0) + file_remove_suid(donor_filp); if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) - return -EFAULT; - + err = -EFAULT; +mext_out: + fput(donor_filp); return err; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..7d7114818f8d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) struct ext4_group_info *db; int err, count = 0, count2 = 0; struct ext4_free_data *entry; - ext4_fsblk_t discard_block; struct list_head *l, *ltmp; list_for_each_safe(l, ltmp, &txn->t_private_list) { @@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) page_cache_release(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->group); - discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) - + entry->start_blk - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, - entry->count); - sb_issue_discard(sb, discard_block, entry->count); - + if (test_opt(sb, DISCARD)) { + ext4_fsblk_t discard_block; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + discard_block = (ext4_fsblk_t)entry->group * + EXT4_BLOCKS_PER_GROUP(sb) + + entry->start_blk + + le32_to_cpu(es->s_first_data_block); + trace_ext4_discard_blocks(sb, + (unsigned long long)discard_block, + entry->count); + sb_issue_discard(sb, discard_block, entry->count); + } kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); } @@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) } /* + * Called on failure; free up any blocks from the inode PA for this + * context. We don't need this for MB_GROUP_PA because we only change + * pa_free in ext4_mb_release_context(), but on failure, we've already + * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed. + */ +static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) +{ + struct ext4_prealloc_space *pa = ac->ac_pa; + int len; + + if (pa && pa->pa_type == MB_INODE_PA) { + len = ac->ac_b_ex.fe_len; + pa->pa_free += len; + } + +} + +/* * use blocks preallocated to inode */ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, @@ -4290,6 +4313,7 @@ repeat: ac->ac_status = AC_STATUS_CONTINUE; goto repeat; } else if (*errp) { + ext4_discard_allocated_blocks(ac); ac->ac_b_ex.fe_len = 0; ar->len = 0; ext4_mb_show_ac(ac); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index a93d5b80f3e2..864614974536 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) * So allocate a credit of 3. We may update * quota (user and group). */ - needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); if (ext4_journal_extend(handle, needed) != 0) retval = ext4_journal_restart(handle, needed); @@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) + EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + 1); if (IS_ERR(handle)) { retval = PTR_ERR(handle); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 25b6b1457360..f5b03a132a2a 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -77,12 +77,14 @@ static int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent **extent) { + struct ext4_extent_header *eh; int ppos, leaf_ppos = path->p_depth; ppos = leaf_ppos; if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { /* leaf block */ *extent = ++path[ppos].p_ext; + path[ppos].p_block = ext_pblock(path[ppos].p_ext); return 0; } @@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, ext_block_hdr(path[cur_ppos+1].p_bh); } + path[leaf_ppos].p_ext = *extent = NULL; + + eh = path[leaf_ppos].p_hdr; + if (le16_to_cpu(eh->eh_entries) == 0) + /* empty leaf is found */ + return -ENODATA; + /* leaf block */ path[leaf_ppos].p_ext = *extent = EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); + path[leaf_ppos].p_block = + ext_pblock(path[leaf_ppos].p_ext); return 0; } } @@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, } /** - * mext_double_down_read - Acquire two inodes' read semaphore - * - * @orig_inode: original inode structure - * @donor_inode: donor inode structure - * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. - */ -static void -mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) -{ - struct inode *first = orig_inode, *second = donor_inode; - - /* - * Use the inode number to provide the stable locking order instead - * of its address, because the C language doesn't guarantee you can - * compare pointers that don't come from the same array. - */ - if (donor_inode->i_ino < orig_inode->i_ino) { - first = donor_inode; - second = orig_inode; - } - - down_read(&EXT4_I(first)->i_data_sem); - down_read(&EXT4_I(second)->i_data_sem); -} - -/** - * mext_double_down_write - Acquire two inodes' write semaphore + * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem * * @orig_inode: original inode structure * @donor_inode: donor inode structure - * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. + * Acquire write lock of i_data_sem of the two inodes (orig and donor) by + * i_ino order. */ static void -mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) +double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { struct inode *first = orig_inode, *second = donor_inode; @@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) } down_write(&EXT4_I(first)->i_data_sem); - down_write(&EXT4_I(second)->i_data_sem); + down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); } /** - * mext_double_up_read - Release two inodes' read semaphore + * double_up_write_data_sem - Release two inodes' write lock of i_data_sem * * @orig_inode: original inode structure to be released its lock first * @donor_inode: donor inode structure to be released its lock second - * Release read semaphore of two inodes (orig and donor). + * Release write lock of i_data_sem of two inodes (orig and donor). */ static void -mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) -{ - up_read(&EXT4_I(orig_inode)->i_data_sem); - up_read(&EXT4_I(donor_inode)->i_data_sem); -} - -/** - * mext_double_up_write - Release two inodes' write semaphore - * - * @orig_inode: original inode structure to be released its lock first - * @donor_inode: donor inode structure to be released its lock second - * Release write semaphore of two inodes (orig and donor). - */ -static void -mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) +double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { up_write(&EXT4_I(orig_inode)->i_data_sem); up_write(&EXT4_I(donor_inode)->i_data_sem); @@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * @donor_inode: donor inode * @from: block offset of orig_inode * @count: block count to be replaced + * @err: pointer to save return value * * Replace original inode extents and donor inode extents page by page. * We implement this replacement in the following three steps: @@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * 3. Change the block information of donor inode to point at the saved * original inode blocks in the dummy extents. * - * Return 0 on success, or a negative error value on failure. + * Return replaced block count. */ static int mext_replace_branches(handle_t *handle, struct inode *orig_inode, struct inode *donor_inode, ext4_lblk_t from, - ext4_lblk_t count) + ext4_lblk_t count, int *err) { struct ext4_ext_path *orig_path = NULL; struct ext4_ext_path *donor_path = NULL; struct ext4_extent *oext, *dext; struct ext4_extent tmp_dext, tmp_oext; ext4_lblk_t orig_off = from, donor_off = from; - int err = 0; int depth; int replaced_count = 0; int dext_alen; - mext_double_down_write(orig_inode, donor_inode); + /* Protect extent trees against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); /* Get the original extent for the block "orig_off" */ - err = get_ext_path(orig_inode, orig_off, &orig_path); - if (err) + *err = get_ext_path(orig_inode, orig_off, &orig_path); + if (*err) goto out; /* Get the donor extent for the head */ - err = get_ext_path(donor_inode, donor_off, &donor_path); - if (err) + *err = get_ext_path(donor_inode, donor_off, &donor_path); + if (*err) goto out; depth = ext_depth(orig_inode); oext = orig_path[depth].p_ext; @@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, dext = donor_path[depth].p_ext; tmp_dext = *dext; - err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, donor_off, count); - if (err) + if (*err) goto out; /* Loop for the donor extents */ @@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, if (!dext) { ext4_error(donor_inode->i_sb, __func__, "The extent for donor must be found"); - err = -EIO; + *err = -EIO; goto out; } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { ext4_error(donor_inode->i_sb, __func__, @@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, "extent(%u) should be equal", donor_off, le32_to_cpu(tmp_dext.ee_block)); - err = -EIO; + *err = -EIO; goto out; } /* Set donor extent to orig extent */ - err = mext_leaf_block(handle, orig_inode, + *err = mext_leaf_block(handle, orig_inode, orig_path, &tmp_dext, &orig_off); - if (err < 0) + if (*err) goto out; /* Set orig extent to donor extent */ - err = mext_leaf_block(handle, donor_inode, + *err = mext_leaf_block(handle, donor_inode, donor_path, &tmp_oext, &donor_off); - if (err < 0) + if (*err) goto out; dext_alen = ext4_ext_get_actual_len(&tmp_dext); @@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, if (orig_path) ext4_ext_drop_refs(orig_path); - err = get_ext_path(orig_inode, orig_off, &orig_path); - if (err) + *err = get_ext_path(orig_inode, orig_off, &orig_path); + if (*err) goto out; depth = ext_depth(orig_inode); oext = orig_path[depth].p_ext; - if (le32_to_cpu(oext->ee_block) + - ext4_ext_get_actual_len(oext) <= orig_off) { - err = 0; - goto out; - } tmp_oext = *oext; if (donor_path) ext4_ext_drop_refs(donor_path); - err = get_ext_path(donor_inode, donor_off, &donor_path); - if (err) + *err = get_ext_path(donor_inode, donor_off, &donor_path); + if (*err) goto out; depth = ext_depth(donor_inode); dext = donor_path[depth].p_ext; - if (le32_to_cpu(dext->ee_block) + - ext4_ext_get_actual_len(dext) <= donor_off) { - err = 0; - goto out; - } tmp_dext = *dext; - err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, donor_off, count - replaced_count); - if (err) + if (*err) goto out; } @@ -795,8 +758,12 @@ out: kfree(donor_path); } - mext_double_up_write(orig_inode, donor_inode); - return err; + ext4_ext_invalidate_cache(orig_inode); + ext4_ext_invalidate_cache(donor_inode); + + double_up_write_data_sem(orig_inode, donor_inode); + + return replaced_count; } /** @@ -808,16 +775,17 @@ out: * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped * @uninit: orig extent is uninitialized or not + * @err: pointer to save return value * * Save the data in original inode blocks and replace original inode extents * with donor inode extents by calling mext_replace_branches(). - * Finally, write out the saved data in new original inode blocks. Return 0 - * on success, or a negative error value on failure. + * Finally, write out the saved data in new original inode blocks. Return + * replaced block count. */ static int move_extent_per_page(struct file *o_filp, struct inode *donor_inode, pgoff_t orig_page_offset, int data_offset_in_page, - int block_len_in_page, int uninit) + int block_len_in_page, int uninit, int *err) { struct inode *orig_inode = o_filp->f_dentry->d_inode; struct address_space *mapping = orig_inode->i_mapping; @@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, long long offs = orig_page_offset << PAGE_CACHE_SHIFT; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int w_flags = 0; - unsigned int tmp_data_len, data_len; + unsigned int tmp_data_size, data_size, replaced_size; void *fsdata; - int ret, i, jblocks; + int i, jblocks; + int err2 = 0; + int replaced_count = 0; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; /* @@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; handle = ext4_journal_start(orig_inode, jblocks); if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - return ret; + *err = PTR_ERR(handle); + return 0; } if (segment_eq(get_fs(), KERNEL_DS)) @@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, * Just swap data blocks between orig and donor. */ if (uninit) { - ret = mext_replace_branches(handle, orig_inode, - donor_inode, orig_blk_offset, - block_len_in_page); - - /* Clear the inode cache not to refer to the old data */ - ext4_ext_invalidate_cache(orig_inode); - ext4_ext_invalidate_cache(donor_inode); + replaced_count = mext_replace_branches(handle, orig_inode, + donor_inode, orig_blk_offset, + block_len_in_page, err); goto out2; } offs = (long long)orig_blk_offset << orig_inode->i_blkbits; - /* Calculate data_len */ + /* Calculate data_size */ if ((orig_blk_offset + block_len_in_page - 1) == ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { /* Replace the last block */ - tmp_data_len = orig_inode->i_size & (blocksize - 1); + tmp_data_size = orig_inode->i_size & (blocksize - 1); /* - * If data_len equal zero, it shows data_len is multiples of + * If data_size equal zero, it shows data_size is multiples of * blocksize. So we set appropriate value. */ - if (tmp_data_len == 0) - tmp_data_len = blocksize; + if (tmp_data_size == 0) + tmp_data_size = blocksize; - data_len = tmp_data_len + + data_size = tmp_data_size + ((block_len_in_page - 1) << orig_inode->i_blkbits); - } else { - data_len = block_len_in_page << orig_inode->i_blkbits; - } + } else + data_size = block_len_in_page << orig_inode->i_blkbits; + + replaced_size = data_size; - ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, + *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, &page, &fsdata); - if (unlikely(ret < 0)) + if (unlikely(*err < 0)) goto out; if (!PageUptodate(page)) { @@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, /* Release old bh and drop refs */ try_to_release_page(page, 0); - ret = mext_replace_branches(handle, orig_inode, donor_inode, - orig_blk_offset, block_len_in_page); - if (ret < 0) - goto out; - - /* Clear the inode cache not to refer to the old data */ - ext4_ext_invalidate_cache(orig_inode); - ext4_ext_invalidate_cache(donor_inode); + replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, + orig_blk_offset, block_len_in_page, + &err2); + if (err2) { + if (replaced_count) { + block_len_in_page = replaced_count; + replaced_size = + block_len_in_page << orig_inode->i_blkbits; + } else + goto out; + } if (!page_has_buffers(page)) create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); @@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, bh = bh->b_this_page; for (i = 0; i < block_len_in_page; i++) { - ret = ext4_get_block(orig_inode, + *err = ext4_get_block(orig_inode, (sector_t)(orig_blk_offset + i), bh, 0); - if (ret < 0) + if (*err < 0) goto out; if (bh->b_this_page != NULL) bh = bh->b_this_page; } - ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, + *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, page, fsdata); page = NULL; @@ -951,7 +921,10 @@ out: out2: ext4_journal_stop(handle); - return ret < 0 ? ret : 0; + if (err2) + *err = err2; + + return replaced_count; } /** @@ -962,7 +935,6 @@ out2: * @orig_start: logical start offset in block for orig * @donor_start: logical start offset in block for donor * @len: the number of blocks to be moved - * @moved_len: moved block length * * Check the arguments of ext4_move_extents() whether the files can be * exchanged with each other. @@ -970,8 +942,8 @@ out2: */ static int mext_check_arguments(struct inode *orig_inode, - struct inode *donor_inode, __u64 orig_start, - __u64 donor_start, __u64 *len, __u64 moved_len) + struct inode *donor_inode, __u64 orig_start, + __u64 donor_start, __u64 *len) { ext4_lblk_t orig_blocks, donor_blocks; unsigned int blkbits = orig_inode->i_blkbits; @@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } + if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { + ext4_debug("ext4 move extent: suid or sgid is set" + " to donor file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + /* Ext4 move extent does not support swapfile */ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { ext4_debug("ext4 move extent: The argument files should " @@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - if (moved_len) { - ext4_debug("ext4 move extent: moved_len should be 0 " - "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, - donor_inode->i_ino); - return -EINVAL; - } - if ((orig_start > EXT_MAX_BLOCK) || (donor_start > EXT_MAX_BLOCK) || (*len > EXT_MAX_BLOCK) || @@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } - /* protect orig and donor against a truncate */ + /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) return ret1; - mext_double_down_read(orig_inode, donor_inode); + /* Protect extent tree against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, - donor_start, &len, *moved_len); - mext_double_up_read(orig_inode, donor_inode); + donor_start, &len); if (ret1) goto out; @@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, seq_start = le32_to_cpu(ext_cur->ee_block); rest_blocks = seq_blocks; - /* Discard preallocations of two inodes */ - down_write(&EXT4_I(orig_inode)->i_data_sem); - ext4_discard_preallocations(orig_inode); - up_write(&EXT4_I(orig_inode)->i_data_sem); - - down_write(&EXT4_I(donor_inode)->i_data_sem); - ext4_discard_preallocations(donor_inode); - up_write(&EXT4_I(donor_inode)->i_data_sem); + /* + * Up semaphore to avoid following problems: + * a. transaction deadlock among ext4_journal_start, + * ->write_begin via pagefault, and jbd2_journal_commit + * b. racing with ->readpage, ->write_begin, and ext4_get_block + * in move_extent_per_page + */ + double_up_write_data_sem(orig_inode, donor_inode); while (orig_page_offset <= seq_end_page) { /* Swap original branches with new branches */ - ret1 = move_extent_per_page(o_filp, donor_inode, + block_len_in_page = move_extent_per_page( + o_filp, donor_inode, orig_page_offset, data_offset_in_page, - block_len_in_page, uninit); - if (ret1 < 0) - goto out; - orig_page_offset++; + block_len_in_page, uninit, + &ret1); + /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; + if (ret1 < 0) + break; if (*moved_len > len) { ext4_error(orig_inode->i_sb, __func__, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); ret1 = -EIO; - goto out; + break; } + orig_page_offset++; data_offset_in_page = 0; rest_blocks -= block_len_in_page; if (rest_blocks > blocks_per_page) @@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, block_len_in_page = rest_blocks; } + double_down_write_data_sem(orig_inode, donor_inode); + if (ret1 < 0) + break; + /* Decrease buffer counter */ if (holecheck_path) ext4_ext_drop_refs(holecheck_path); @@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, } out: + if (*moved_len) { + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); + } + if (orig_path) { ext4_ext_drop_refs(orig_path); kfree(orig_path); @@ -1422,7 +1406,7 @@ out: ext4_ext_drop_refs(holecheck_path); kfree(holecheck_path); } - + double_up_write_data_sem(orig_inode, donor_inode); ret2 = mext_inode_double_unlock(orig_inode, donor_inode); if (ret1) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6d2c1b897fc7..17a17e10dd60 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1292,9 +1292,6 @@ errout: * add_dirent_to_buf will attempt search the directory block for * space. It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. - * - * NOTE! bh is NOT released in the case where ENOSPC is returned. In - * all other cases bh is released. */ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, struct inode *inode, struct ext4_dir_entry_2 *de, @@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, top = bh->b_data + blocksize - reclen; while ((char *) de <= top) { if (!ext4_check_dir_entry("ext4_add_entry", dir, de, - bh, offset)) { - brelse(bh); + bh, offset)) return -EIO; - } - if (ext4_match(namelen, name, de)) { - brelse(bh); + if (ext4_match(namelen, name, de)) return -EEXIST; - } nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); if ((de->inode? rlen - nlen: rlen) >= reclen) @@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, err = ext4_journal_get_write_access(handle, bh); if (err) { ext4_std_error(dir->i_sb, err); - brelse(bh); return err; } @@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, err = ext4_handle_dirty_metadata(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); - brelse(bh); return 0; } @@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, if (!(de)) return retval; - return add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + brelse(bh); + return retval; } /* @@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if(!bh) return retval; retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (retval != -ENOSPC) + if (retval != -ENOSPC) { + brelse(bh); return retval; + } if (blocks == 1 && !dx_fallback && EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) @@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); - return add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + brelse(bh); + return retval; } /* @@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, goto journal_error; err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (err != -ENOSPC) { - bh = NULL; + if (err != -ENOSPC) goto cleanup; - } /* Block full, should compress but for now just split */ dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", @@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (!de) goto cleanup; err = add_dirent_to_buf(handle, dentry, inode, de, bh); - bh = NULL; goto cleanup; journal_error: @@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir, retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3cfc343c41b5..3b2c5541d8a6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_bh; if (IS_ERR(gdb = bclean(handle, sb, block))) { - err = PTR_ERR(bh); + err = PTR_ERR(gdb); goto exit_bh; } ext4_handle_dirty_metadata(handle, NULL, gdb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d4ca92aab514..92943f2ca2ab 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb) if (sb->s_dirt) ext4_commit_super(sb, 1); - ext4_release_system_zone(sb); - ext4_mb_release(sb); - ext4_ext_release(sb); - ext4_xattr_put_super(sb); if (sbi->s_journal) { err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, __func__, "Couldn't clean up the journal"); } + + ext4_release_system_zone(sb); + ext4_mb_release(sb); + ext4_ext_release(sb); + ext4_xattr_put_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -702,8 +704,13 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_allocated_meta_blocks = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); ei->cur_aio_dio = NULL; + ei->i_sync_tid = 0; + ei->i_datasync_tid = 0; return &ei->vfs_inode; } @@ -899,6 +906,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NO_AUTO_DA_ALLOC)) seq_puts(seq, ",noauto_da_alloc"); + if (test_opt(sb, DISCARD)) + seq_puts(seq, ",discard"); + + if (test_opt(sb, NOLOAD)) + seq_puts(seq, ",norecovery"); + ext4_show_quota_options(seq, sb); return 0; @@ -991,7 +1004,9 @@ static const struct dquot_operations ext4_quota_operations = { .reserve_space = dquot_reserve_space, .claim_space = dquot_claim_space, .release_rsv = dquot_release_reserved_space, +#ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, +#endif .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, .free_inode = dquot_free_inode, @@ -1079,7 +1094,8 @@ enum { Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_block_validity, Opt_noblock_validity, - Opt_inode_readahead_blks, Opt_journal_ioprio + Opt_inode_readahead_blks, Opt_journal_ioprio, + Opt_discard, Opt_nodiscard, }; static const match_table_t tokens = { @@ -1104,6 +1120,7 @@ static const match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_noload, "noload"}, + {Opt_noload, "norecovery"}, {Opt_nobh, "nobh"}, {Opt_bh, "bh"}, {Opt_commit, "commit=%u"}, @@ -1144,6 +1161,8 @@ static const match_table_t tokens = { {Opt_auto_da_alloc, "auto_da_alloc=%u"}, {Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL}, }; @@ -1565,6 +1584,12 @@ set_qf_format: else set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; + case Opt_discard: + set_opt(sbi->s_mount_opt, DISCARD); + break; + case Opt_nodiscard: + clear_opt(sbi->s_mount_opt, DISCARD); + break; default: ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " @@ -1673,14 +1698,14 @@ static int ext4_fill_flex_info(struct super_block *sb) size_t size; int i; - if (!sbi->s_es->s_log_groups_per_flex) { + sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; + groups_per_flex = 1 << sbi->s_log_groups_per_flex; + + if (groups_per_flex < 2) { sbi->s_log_groups_per_flex = 0; return 1; } - sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; - groups_per_flex = 1 << sbi->s_log_groups_per_flex; - /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << @@ -3668,13 +3693,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); - ext4_free_blocks_count_set(es, buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); - es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT4_NAME_LEN; fsid = le64_to_cpup((void *)es->s_uuid) ^ le64_to_cpup((void *)es->s_uuid + sizeof(u64)); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fed5b01d7a8d..025701926f9a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (error) goto cleanup; + error = ext4_journal_get_write_access(handle, is.iloc.bh); + if (error) + goto cleanup; + if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); @@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (flags & XATTR_CREATE) goto cleanup; } - error = ext4_journal_get_write_access(handle, is.iloc.bh); - if (error) - goto cleanup; if (!value) { if (!is.s.not_found) error = ext4_xattr_ibody_set(handle, inode, &i, &is); diff --git a/fs/fcntl.c b/fs/fcntl.c index 2cf93ec40a67..97e01dc0d95f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock); static struct kmem_cache *fasync_cache __read_mostly; /* - * fasync_helper() is used by almost all character device drivers - * to set up the fasync queue. It returns negative on error, 0 if it did - * no changes and positive if it added/deleted the entry. + * Remove a fasync entry. If successfully removed, return + * positive and clear the FASYNC flag. If no entry exists, + * do nothing and return 0. + * + * NOTE! It is very important that the FASYNC flag always + * match the state "is the filp on a fasync list". + * + * We always take the 'filp->f_lock', in since fasync_lock + * needs to be irq-safe. */ -int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) +static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *fa, **fp; - struct fasync_struct *new = NULL; int result = 0; - if (on) { - new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); - if (!new) - return -ENOMEM; + spin_lock(&filp->f_lock); + write_lock_irq(&fasync_lock); + for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { + if (fa->fa_file != filp) + continue; + *fp = fa->fa_next; + kmem_cache_free(fasync_cache, fa); + filp->f_flags &= ~FASYNC; + result = 1; + break; } + write_unlock_irq(&fasync_lock); + spin_unlock(&filp->f_lock); + return result; +} + +/* + * Add a fasync entry. Return negative on error, positive if + * added, and zero if did nothing but change an existing one. + * + * NOTE! It is very important that the FASYNC flag always + * match the state "is the filp on a fasync list". + */ +static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) +{ + struct fasync_struct *new, *fa, **fp; + int result = 0; + + new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); + if (!new) + return -ENOMEM; - /* - * We need to take f_lock first since it's not an IRQ-safe - * lock. - */ spin_lock(&filp->f_lock); write_lock_irq(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { - if (fa->fa_file == filp) { - if(on) { - fa->fa_fd = fd; - kmem_cache_free(fasync_cache, new); - } else { - *fp = fa->fa_next; - kmem_cache_free(fasync_cache, fa); - result = 1; - } - goto out; - } + if (fa->fa_file != filp) + continue; + fa->fa_fd = fd; + kmem_cache_free(fasync_cache, new); + goto out; } - if (on) { - new->magic = FASYNC_MAGIC; - new->fa_file = filp; - new->fa_fd = fd; - new->fa_next = *fapp; - *fapp = new; - result = 1; - } + new->magic = FASYNC_MAGIC; + new->fa_file = filp; + new->fa_fd = fd; + new->fa_next = *fapp; + *fapp = new; + result = 1; + filp->f_flags |= FASYNC; + out: - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; write_unlock_irq(&fasync_lock); spin_unlock(&filp->f_lock); return result; } +/* + * fasync_helper() is used by almost all character device drivers + * to set up the fasync queue, and for regular files by the file + * lease code. It returns negative on error, 0 if it did no changes + * and positive if it added/deleted the entry. + */ +int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) +{ + if (!on) + return fasync_remove_entry(filp, fapp); + return fasync_add_entry(fd, filp, fapp); +} + EXPORT_SYMBOL(fasync_helper); void __kill_fasync(struct fasync_struct *fa, int sig, int band) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c18913a777ae..a9f5e137f1d3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -828,6 +828,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (!page) break; + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + pagefault_disable(); tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); pagefault_enable(); diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 6d98f116ca03..424b0337f524 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -289,6 +289,10 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name, err = hfs_brec_find(&src_fd); if (err) goto out; + if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { + err = -EIO; + goto out; + } hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 7c69b98a2e45..2b3b8611b41b 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -79,6 +79,11 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos++; /* fall through */ case 1: + if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { + err = -EIO; + goto out; + } + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (entry.type != HFS_CDR_THD) { printk(KERN_ERR "hfs: bad catalog folder thread\n"); @@ -109,6 +114,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) err = -EIO; goto out; } + + if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { + err = -EIO; + goto out; + } + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); type = entry.type; len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index f7fcbe49da72..5ed7252b7b23 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -409,8 +409,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) /* try to get the root inode */ hfs_find_init(HFS_SB(sb)->cat_tree, &fd); res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); - if (!res) + if (!res) { + if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { + res = -EIO; + goto bail; + } hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); + } if (res) { hfs_find_exit(&fd); goto bail_no_root; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index d4cfd6d2779e..8896c1d4febe 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) JBUFFER_TRACE(jh, "ph3: write metadata"); flags = jbd2_journal_write_metadata_buffer(commit_transaction, jh, &new_jh, blocknr); + if (flags < 0) { + jbd2_journal_abort(journal, flags); + continue; + } set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); wbuf[bufs++] = jh2bh(new_jh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fed85388ee86..b7ca3a92a4db 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno); EXPORT_SYMBOL(jbd2_journal_ack_err); EXPORT_SYMBOL(jbd2_journal_clear_err); EXPORT_SYMBOL(jbd2_log_wait_commit); +EXPORT_SYMBOL(jbd2_log_start_commit); EXPORT_SYMBOL(jbd2_journal_start_commit); EXPORT_SYMBOL(jbd2_journal_force_commit_nested); EXPORT_SYMBOL(jbd2_journal_wipe); @@ -358,6 +359,10 @@ repeat: jbd_unlock_bh_state(bh_in); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); + if (!tmp) { + jbd2_journal_put_journal_head(new_jh); + return -ENOMEM; + } jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { jbd2_free(tmp, bh_in->b_size); @@ -1248,6 +1253,13 @@ int jbd2_journal_load(journal_t *journal) if (jbd2_journal_recover(journal)) goto recovery_error; + if (journal->j_failed_commit) { + printk(KERN_ERR "JBD2: journal transaction %u on %s " + "is corrupt.\n", journal->j_failed_commit, + journal->j_devname); + return -EIO; + } + /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory * and reset them on disk. */ diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 090c556ffed2..3b6f2fa12cff 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -700,7 +700,8 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_ struct jffs2_raw_inode ri; struct jffs2_node_frag *last_frag; union jffs2_device_node dev; - char *mdata = NULL, mdatalen = 0; + char *mdata = NULL; + int mdatalen = 0; uint32_t alloclen, ilen; int ret; diff --git a/fs/namei.c b/fs/namei.c index d11f404667e9..a2b3c28a499d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -234,6 +234,7 @@ int generic_permission(struct inode *inode, int mask, /* * Searching includes executable on directories, else just read. */ + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) if (capable(CAP_DAC_READ_SEARCH)) return 0; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e1d415e97849..0d289823e856 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.eof = 0; data->res.count = bytes; + nfs_fattr_init(&data->fattr); msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); NFS_PROTO(data->inode)->commit_setup(data, &msg); @@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.count = bytes; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); task_setup_data.task = &data->task; task_setup_data.callback_data = data; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f5fdd39e037a..393d40fd7eb9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + if (gfp & __GFP_WAIT) + nfs_wb_page(page->mapping->host, page); /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index fa588006588d..237874f1af23 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) */ int nfs_fscache_release_page(struct page *page, gfp_t gfp) { - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - struct fscache_cookie *cookie = nfsi->fscache; - - BUG_ON(!cookie); - if (PageFsCache(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 0adefc40cc89..59047f8d7d72 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -120,7 +120,7 @@ static struct { { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, - { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, }, }; struct mountres { diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5e078b222b4e..7bc2da8efd4a 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -699,7 +699,7 @@ static struct { { NFSERR_BAD_COOKIE, -EBADCOOKIE }, { NFSERR_NOTSUPP, -ENOTSUPP }, { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -ESERVERFAULT }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, { NFSERR_BADTYPE, -EBADTYPE }, { NFSERR_JUKEBOX, -EJUKEBOX }, { -1, -EIO } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 6ea07a3c75d4..b4a6b1a586d6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -141,6 +141,7 @@ enum { NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ + NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ }; struct nfs4_state { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 741a562177fc..6c200595099f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1573,6 +1573,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; + if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; @@ -3976,6 +3978,22 @@ static const struct rpc_call_ops nfs4_lock_ops = { .rpc_release = nfs4_lock_release, }; +static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = lsp->ls_state; + + switch (error) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_nograce(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + }; +} + static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) { struct nfs4_lockdata *data; @@ -4011,6 +4029,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; + if (ret) + nfs4_handle_setlk_error(data->server, data->lsp, + data->arg.new_lock_owner, ret); } else data->cancelled = 1; rpc_put_task(task); @@ -4060,8 +4081,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock { struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; - int status; + int status = -ENOLCK; + if ((fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + goto out; /* Is this a delegated open? */ status = nfs4_set_lock_state(state, request); if (status != 0) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 20b4e30e6c82..a4cd1b79b71d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4554,7 +4554,7 @@ static int decode_sequence(struct xdr_stream *xdr, * If the server returns different values for sessionID, slotID or * sequence number, the server is looney tunes. */ - status = -ESERVERFAULT; + status = -EREMOTEIO; if (memcmp(id.data, res->sr_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { @@ -5678,7 +5678,7 @@ static struct { { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, { NFS4ERR_NOTSUPP, -ENOTSUPP }, { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, { NFS4ERR_BADTYPE, -EBADTYPE }, { NFS4ERR_LOCKED, -EAGAIN }, { NFS4ERR_SYMLINK, -ELOOP }, @@ -5705,7 +5705,7 @@ nfs4_stat_to_errno(int stat) } if (stat <= 10000 || stat > 10100) { /* The server is looney tunes. */ - return -ESERVERFAULT; + return -EREMOTEIO; } /* If we cannot translate the error, the recovery routines should * handle it. diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e2975939126a..a12c45b65dd4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_free_request); } +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. @@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { - int ret = 0; - - if (!test_bit(PG_BUSY, &req->wb_flags)) - goto out; - ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, - nfs_wait_bit_killable, TASK_KILLABLE); -out: - return ret; + return wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 90be551b80c1..4bf23f6f93cf 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -241,6 +241,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs_put_super(struct super_block *); static void nfs_kill_super(struct super_block *); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); @@ -264,6 +265,7 @@ static const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -333,6 +335,7 @@ static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, @@ -734,8 +737,6 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data = kzalloc(sizeof(*data), GFP_KERNEL); if (data) { - data->rsize = NFS_MAX_FILE_IO_SIZE; - data->wsize = NFS_MAX_FILE_IO_SIZE; data->acregmin = NFS_DEF_ACREGMIN; data->acregmax = NFS_DEF_ACREGMAX; data->acdirmin = NFS_DEF_ACDIRMIN; @@ -2198,6 +2199,17 @@ error_splat_super: } /* + * Ensure that we unregister the bdi before kill_anon_super + * releases the device name + */ +static void nfs_put_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + bdi_unregister(&server->backing_dev_info); +} + +/* * Destroy an NFS2/3 superblock */ static void nfs_kill_super(struct super_block *s) @@ -2205,7 +2217,6 @@ static void nfs_kill_super(struct super_block *s) struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - bdi_unregister(&server->backing_dev_info); nfs_fscache_release_super_cookie(s); nfs_free_server(server); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53eb26c16b50..cf6c06f66edc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1542,6 +1542,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) break; } ret = nfs_wait_on_request(req); + nfs_release_request(req); if (ret < 0) goto out; } @@ -1612,15 +1613,16 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (ret) goto out_unlock; page_cache_get(newpage); + spin_lock(&mapping->host->i_lock); req->wb_page = newpage; SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); + set_page_private(newpage, (unsigned long)req); ClearPagePrivate(page); set_page_private(page, 0); + spin_unlock(&mapping->host->i_lock); page_cache_release(page); out_unlock: nfs_clear_page_tag_locked(req); - nfs_release_request(req); out: return ret; } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 725d02f210e2..6d9c6aabc85e 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -389,7 +389,7 @@ sort_pacl(struct posix_acl *pacl) sort_pacl_range(pacl, 1, i-1); BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); - j = i++; + j = ++i; while (pacl->a_entries[j].e_tag == ACL_GROUP) j++; sort_pacl_range(pacl, i, j-1); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a293f0273263..570dd1c103d5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -774,12 +774,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp, int (*fsync) (struct file *, struct dentry *, int); int err; - err = filemap_fdatawrite(inode->i_mapping); + err = filemap_write_and_wait(inode->i_mapping); if (err == 0 && fop && (fsync = fop->fsync)) err = fsync(filp, dp, 0); - if (err == 0) - err = filemap_fdatawait(inode->i_mapping); - return err; } diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index c9ee67b442e1..1afb0a10229f 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data) if (warned) return 0; - warned = false; + warned = true; entry = p; ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index dcd2040d330c..ca44337b06ce 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -558,7 +558,7 @@ retry: spin_lock(&group->inotify_data.idr_lock); ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, - group->inotify_data.last_wd, + group->inotify_data.last_wd+1, &tmp_ientry->wd); spin_unlock(&group->inotify_data.idr_lock); if (ret) { @@ -638,7 +638,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); - group->inotify_data.last_wd = 1; + group->inotify_data.last_wd = 0; group->inotify_data.user = user; group->inotify_data.fa = NULL; diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 038a6022152f..49cfd5f54238 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -1,7 +1,9 @@ /************************************************************ * EFI GUID Partition Table handling - * Per Intel EFI Specification v1.02 - * http://developer.intel.com/technology/efi/efi.htm + * + * http://www.uefi.org/specs/ + * http://www.intel.com/technology/efi/ + * * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com> * Copyright 2000,2001,2002,2004 Dell Inc. * @@ -92,6 +94,7 @@ * ************************************************************/ #include <linux/crc32.h> +#include <linux/math64.h> #include "check.h" #include "efi.h" @@ -141,7 +144,8 @@ last_lba(struct block_device *bdev) { if (!bdev || !bdev->bd_inode) return 0; - return (bdev->bd_inode->i_size >> 9) - 1ULL; + return div_u64(bdev->bd_inode->i_size, + bdev_logical_block_size(bdev)) - 1ULL; } static inline int @@ -188,6 +192,7 @@ static size_t read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) { size_t totalreadcount = 0; + sector_t n = lba * (bdev_logical_block_size(bdev) / 512); if (!bdev || !buffer || lba > last_lba(bdev)) return 0; @@ -195,7 +200,7 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) while (count) { int copied = 512; Sector sect; - unsigned char *data = read_dev_sector(bdev, lba++, §); + unsigned char *data = read_dev_sector(bdev, n++, §); if (!data) break; if (copied > count) @@ -257,15 +262,16 @@ static gpt_header * alloc_read_gpt_header(struct block_device *bdev, u64 lba) { gpt_header *gpt; + unsigned ssz = bdev_logical_block_size(bdev); + if (!bdev) return NULL; - gpt = kzalloc(sizeof (gpt_header), GFP_KERNEL); + gpt = kzalloc(ssz, GFP_KERNEL); if (!gpt) return NULL; - if (read_lba(bdev, lba, (u8 *) gpt, - sizeof (gpt_header)) < sizeof (gpt_header)) { + if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) { kfree(gpt); gpt=NULL; return NULL; @@ -601,6 +607,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) gpt_header *gpt = NULL; gpt_entry *ptes = NULL; u32 i; + unsigned ssz = bdev_logical_block_size(bdev) / 512; if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { kfree(gpt); @@ -611,13 +618,14 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) pr_debug("GUID Partition Table is valid! Yea!\n"); for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { + u64 start = le64_to_cpu(ptes[i].starting_lba); + u64 size = le64_to_cpu(ptes[i].ending_lba) - + le64_to_cpu(ptes[i].starting_lba) + 1ULL; + if (!is_pte_valid(&ptes[i], last_lba(bdev))) continue; - put_partition(state, i+1, le64_to_cpu(ptes[i].starting_lba), - (le64_to_cpu(ptes[i].ending_lba) - - le64_to_cpu(ptes[i].starting_lba) + - 1ULL)); + put_partition(state, i+1, start * ssz, size * ssz); /* If this is a RAID volume, tell md */ if (!efi_guidcmp(ptes[i].partition_type_guid, diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h index 2cc89d0475bf..6998b589abf9 100644 --- a/fs/partitions/efi.h +++ b/fs/partitions/efi.h @@ -37,7 +37,6 @@ #define EFI_PMBR_OSTYPE_EFI 0xEF #define EFI_PMBR_OSTYPE_EFI_GPT 0xEE -#define GPT_BLOCK_SIZE 512 #define GPT_HEADER_SIGNATURE 0x5452415020494645ULL #define GPT_HEADER_REVISION_V1 0x00010000 #define GPT_PRIMARY_PARTITION_TABLE_LBA 1 @@ -79,7 +78,12 @@ typedef struct _gpt_header { __le32 num_partition_entries; __le32 sizeof_partition_entry; __le32 partition_entry_array_crc32; - u8 reserved2[GPT_BLOCK_SIZE - 92]; + + /* The rest of the logical block is reserved by UEFI and must be zero. + * EFI standard handles this by: + * + * uint8_t reserved2[ BlockSize - 92 ]; + */ } __attribute__ ((packed)) gpt_header; typedef struct _gpt_entry_attributes { diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39b49c42a7ed..253498739978 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1388,6 +1388,70 @@ void vfs_dq_drop(struct inode *inode) EXPORT_SYMBOL(vfs_dq_drop); /* + * inode_reserved_space is managed internally by quota, and protected by + * i_lock similar to i_blocks+i_bytes. + */ +static qsize_t *inode_reserved_space(struct inode * inode) +{ + /* Filesystem must explicitly define it's own method in order to use + * quota reservation interface */ + BUG_ON(!inode->i_sb->dq_op->get_reserved_space); + return inode->i_sb->dq_op->get_reserved_space(inode); +} + +static void inode_add_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) += number; + spin_unlock(&inode->i_lock); +} + + +static void inode_claim_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) -= number; + __inode_add_bytes(inode, number); + spin_unlock(&inode->i_lock); +} + +static void inode_sub_rsv_space(struct inode *inode, qsize_t number) +{ + spin_lock(&inode->i_lock); + *inode_reserved_space(inode) -= number; + spin_unlock(&inode->i_lock); +} + +static qsize_t inode_get_rsv_space(struct inode *inode) +{ + qsize_t ret; + + if (!inode->i_sb->dq_op->get_reserved_space) + return 0; + spin_lock(&inode->i_lock); + ret = *inode_reserved_space(inode); + spin_unlock(&inode->i_lock); + return ret; +} + +static void inode_incr_space(struct inode *inode, qsize_t number, + int reserve) +{ + if (reserve) + inode_add_rsv_space(inode, number); + else + inode_add_bytes(inode, number); +} + +static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) +{ + if (reserve) + inode_sub_rsv_space(inode, number); + else + inode_sub_bytes(inode, number); +} + +/* * Following four functions update i_blocks+i_bytes fields and * quota information (together with appropriate checks) * NOTE: We absolutely rely on the fact that caller dirties @@ -1405,6 +1469,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int cnt, ret = QUOTA_OK; char warntype[MAXQUOTAS]; + /* + * First test before acquiring mutex - solves deadlocks when we + * re-enter the quota code and are already holding the mutex + */ + if (IS_NOQUOTA(inode)) { + inode_incr_space(inode, number, reserve); + goto out; + } + + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + if (IS_NOQUOTA(inode)) { + inode_incr_space(inode, number, reserve); + goto out_unlock; + } + for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; @@ -1415,7 +1494,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA) { ret = NO_QUOTA; - goto out_unlock; + spin_unlock(&dq_data_lock); + goto out_flush_warn; } } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1426,64 +1506,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, else dquot_incr_space(inode->i_dquot[cnt], number); } - if (!reserve) - inode_add_bytes(inode, number); -out_unlock: + inode_incr_space(inode, number, reserve); spin_unlock(&dq_data_lock); - flush_warnings(inode->i_dquot, warntype); - return ret; -} - -int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) -{ - int cnt, ret = QUOTA_OK; - - /* - * First test before acquiring mutex - solves deadlocks when we - * re-enter the quota code and are already holding the mutex - */ - if (IS_NOQUOTA(inode)) { - inode_add_bytes(inode, number); - goto out; - } - - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) { - inode_add_bytes(inode, number); - goto out_unlock; - } - - ret = __dquot_alloc_space(inode, number, warn, 0); - if (ret == NO_QUOTA) - goto out_unlock; + if (reserve) + goto out_flush_warn; /* Dirtify all the dquots - this can block when journalling */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); +out_flush_warn: + flush_warnings(inode->i_dquot, warntype); out_unlock: up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); out: return ret; } + +int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) +{ + return __dquot_alloc_space(inode, number, warn, 0); +} EXPORT_SYMBOL(dquot_alloc_space); int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) { - int ret = QUOTA_OK; - - if (IS_NOQUOTA(inode)) - goto out; - - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) - goto out_unlock; - - ret = __dquot_alloc_space(inode, number, warn, 1); -out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); -out: - return ret; + return __dquot_alloc_space(inode, number, warn, 1); } EXPORT_SYMBOL(dquot_reserve_space); @@ -1540,14 +1588,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number) int ret = QUOTA_OK; if (IS_NOQUOTA(inode)) { - inode_add_bytes(inode, number); + inode_claim_rsv_space(inode, number); goto out; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - inode_add_bytes(inode, number); + inode_claim_rsv_space(inode, number); goto out; } @@ -1559,7 +1607,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number) number); } /* Update inode bytes */ - inode_add_bytes(inode, number); + inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); /* Dirtify all the dquots - this can block when journalling */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) @@ -1572,38 +1620,9 @@ out: EXPORT_SYMBOL(dquot_claim_space); /* - * Release reserved quota space - */ -void dquot_release_reserved_space(struct inode *inode, qsize_t number) -{ - int cnt; - - if (IS_NOQUOTA(inode)) - goto out; - - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - if (IS_NOQUOTA(inode)) - goto out_unlock; - - spin_lock(&dq_data_lock); - /* Release reserved dquots */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_free_reserved_space(inode->i_dquot[cnt], number); - } - spin_unlock(&dq_data_lock); - -out_unlock: - up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); -out: - return; -} -EXPORT_SYMBOL(dquot_release_reserved_space); - -/* * This operation can block, but only after everything is updated */ -int dquot_free_space(struct inode *inode, qsize_t number) +int __dquot_free_space(struct inode *inode, qsize_t number, int reserve) { unsigned int cnt; char warntype[MAXQUOTAS]; @@ -1612,7 +1631,7 @@ int dquot_free_space(struct inode *inode, qsize_t number) * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) { out_sub: - inode_sub_bytes(inode, number); + inode_decr_space(inode, number, reserve); return QUOTA_OK; } @@ -1627,21 +1646,43 @@ out_sub: if (!inode->i_dquot[cnt]) continue; warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); - dquot_decr_space(inode->i_dquot[cnt], number); + if (reserve) + dquot_free_reserved_space(inode->i_dquot[cnt], number); + else + dquot_decr_space(inode->i_dquot[cnt], number); } - inode_sub_bytes(inode, number); + inode_decr_space(inode, number, reserve); spin_unlock(&dq_data_lock); + + if (reserve) + goto out_unlock; /* Dirtify all the dquots - this can block when journalling */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (inode->i_dquot[cnt]) mark_dquot_dirty(inode->i_dquot[cnt]); +out_unlock: flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } + +int dquot_free_space(struct inode *inode, qsize_t number) +{ + return __dquot_free_space(inode, number, 0); +} EXPORT_SYMBOL(dquot_free_space); /* + * Release reserved quota space + */ +void dquot_release_reserved_space(struct inode *inode, qsize_t number) +{ + __dquot_free_space(inode, number, 1); + +} +EXPORT_SYMBOL(dquot_release_reserved_space); + +/* * This operation can block, but only after everything is updated */ int dquot_free_inode(const struct inode *inode, qsize_t number) @@ -1679,19 +1720,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number) EXPORT_SYMBOL(dquot_free_inode); /* - * call back function, get reserved quota space from underlying fs - */ -qsize_t dquot_get_reserved_space(struct inode *inode) -{ - qsize_t reserved_space = 0; - - if (sb_any_quota_active(inode->i_sb) && - inode->i_sb->dq_op->get_reserved_space) - reserved_space = inode->i_sb->dq_op->get_reserved_space(inode); - return reserved_space; -} - -/* * Transfer the number of inode and blocks from one diskquota to an other. * * This operation can block, but only after everything is updated @@ -1734,7 +1762,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) } spin_lock(&dq_data_lock); cur_space = inode_get_bytes(inode); - rsv_space = dquot_get_reserved_space(inode); + rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; /* Build the transfer_from list and check the limits */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a14d6cd9eeda..d240c15bf086 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2531,6 +2531,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) return reiserfs_write_full_page(page, wbc); } +static void reiserfs_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + reiserfs_truncate_file(inode, 0); +} + static int reiserfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -2597,6 +2603,8 @@ static int reiserfs_write_begin(struct file *file, if (ret) { unlock_page(page); page_cache_release(page); + /* Truncate allocated blocks */ + reiserfs_truncate_failed_write(inode); } return ret; } @@ -2689,8 +2697,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, ** transaction tracking stuff when the size changes. So, we have ** to do the i_size updates here. */ - pos += copied; - if (pos > inode->i_size) { + if (pos + copied > inode->i_size) { struct reiserfs_transaction_handle myth; reiserfs_write_lock(inode->i_sb); /* If the file have grown beyond the border where it @@ -2708,7 +2715,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, goto journal_error; } reiserfs_update_inode_transaction(inode); - inode->i_size = pos; + inode->i_size = pos + copied; /* * this will just nest into our transaction. It's important * to use mark_inode_dirty so the inode gets pushed around on the @@ -2735,6 +2742,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, out: unlock_page(page); page_cache_release(page); + + if (pos + len > inode->i_size) + reiserfs_truncate_failed_write(inode); + return ret == 0 ? copied : ret; journal_error: diff --git a/fs/romfs/super.c b/fs/romfs/super.c index c117fa80d1e9..42d213546894 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -544,6 +544,7 @@ error: error_rsb_inval: ret = -EINVAL; error_rsb: + kfree(rsb); return ret; } diff --git a/fs/stat.c b/fs/stat.c index 075694e31d8b..c4ecd52c5737 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, } #endif /* __ARCH_WANT_STAT64 */ -void inode_add_bytes(struct inode *inode, loff_t bytes) +/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ +void __inode_add_bytes(struct inode *inode, loff_t bytes) { - spin_lock(&inode->i_lock); inode->i_blocks += bytes >> 9; bytes &= 511; inode->i_bytes += bytes; @@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes) inode->i_blocks++; inode->i_bytes -= 512; } +} + +void inode_add_bytes(struct inode *inode, loff_t bytes) +{ + spin_lock(&inode->i_lock); + __inode_add_bytes(inode, bytes); spin_unlock(&inode->i_lock); } diff --git a/fs/super.c b/fs/super.c index 19eb70b374bc..aff046b0fe78 100644 --- a/fs/super.c +++ b/fs/super.c @@ -901,8 +901,9 @@ int get_sb_single(struct file_system_type *fs_type, return error; } s->s_flags |= MS_ACTIVE; + } else { + do_remount_sb(s, flags, data, 0); } - do_remount_sb(s, flags, data, 0); simple_set_mnt(mnt, s); return 0; } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index e28cecf179f5..02a022a3d473 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -94,30 +94,29 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) if (!sd_attrs) return -ENOMEM; sd->s_iattr = sd_attrs; - } else { - /* attributes were changed at least once in past */ - iattrs = &sd_attrs->ia_iattr; - - if (ia_valid & ATTR_UID) - iattrs->ia_uid = iattr->ia_uid; - if (ia_valid & ATTR_GID) - iattrs->ia_gid = iattr->ia_gid; - if (ia_valid & ATTR_ATIME) - iattrs->ia_atime = timespec_trunc(iattr->ia_atime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_MTIME) - iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_CTIME) - iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, - inode->i_sb->s_time_gran); - if (ia_valid & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - iattrs->ia_mode = sd->s_mode = mode; - } + } + /* attributes were changed at least once in past */ + iattrs = &sd_attrs->ia_iattr; + + if (ia_valid & ATTR_UID) + iattrs->ia_uid = iattr->ia_uid; + if (ia_valid & ATTR_GID) + iattrs->ia_gid = iattr->ia_gid; + if (ia_valid & ATTR_ATIME) + iattrs->ia_atime = timespec_trunc(iattr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + iattrs->ia_mode = sd->s_mode = mode; } return error; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 9d1b8c2e6c45..1e4543cbcd27 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1078,21 +1078,39 @@ static int udf_fill_partdesc_info(struct super_block *sb, return 0; } -static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) +static void udf_find_vat_block(struct super_block *sb, int p_index, + int type1_index, sector_t start_block) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map = &sbi->s_partmaps[p_index]; + sector_t vat_block; struct kernel_lb_addr ino; + + /* + * VAT file entry is in the last recorded block. Some broken disks have + * it a few blocks before so try a bit harder... + */ + ino.partitionReferenceNum = type1_index; + for (vat_block = start_block; + vat_block >= map->s_partition_root && + vat_block >= start_block - 3 && + !sbi->s_vat_inode; vat_block--) { + ino.logicalBlockNum = vat_block - map->s_partition_root; + sbi->s_vat_inode = udf_iget(sb, &ino); + } +} + +static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) +{ + struct udf_sb_info *sbi = UDF_SB(sb); + struct udf_part_map *map = &sbi->s_partmaps[p_index]; struct buffer_head *bh = NULL; struct udf_inode_info *vati; uint32_t pos; struct virtualAllocationTable20 *vat20; sector_t blocks = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; - /* VAT file entry is in the last recorded block */ - ino.partitionReferenceNum = type1_index; - ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; - sbi->s_vat_inode = udf_iget(sb, &ino); + udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); if (!sbi->s_vat_inode && sbi->s_last_block != blocks - 1) { printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" @@ -1100,9 +1118,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) "block of the device (%lu).\n", (unsigned long)sbi->s_last_block, (unsigned long)blocks - 1); - ino.partitionReferenceNum = type1_index; - ino.logicalBlockNum = blocks - 1 - map->s_partition_root; - sbi->s_vat_inode = udf_iget(sb, &ino); + udf_find_vat_block(sb, p_index, type1_index, blocks - 1); } if (!sbi->s_vat_inode) return 1; |