diff options
author | Varun Wadekar <vwadekar@nvidia.com> | 2012-08-13 14:23:04 +0530 |
---|---|---|
committer | Varun Wadekar <vwadekar@nvidia.com> | 2012-08-13 14:23:04 +0530 |
commit | 82cfe176157cf926133dea3a8879135bf20b9af2 (patch) | |
tree | 8efe310a6dd0d411399ce299aecfaf3bd96de393 /fs | |
parent | 564e6b431e9696d6b7186d120d563a74ae290092 (diff) | |
parent | 8067fa23092f2c4e18c29c90f7d5bb765dbf8954 (diff) |
Merge commit 'v3.4.8' into android-t114-3.4-rebased
Linux v3.4.8
Conflicts:
drivers/net/tun.c
kernel/power/suspend.c
Change-Id: Ia26546425cd20f127dbf4dd58cfca41bda47d23d
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/async-thread.c | 9 | ||||
-rw-r--r-- | fs/cifs/cifssmb.c | 30 | ||||
-rw-r--r-- | fs/cifs/connect.c | 27 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 7 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/exofs/ore.c | 8 | ||||
-rw-r--r-- | fs/exofs/ore_raid.c | 67 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/bitmap.c | 12 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 6 | ||||
-rw-r--r-- | fs/ext4/extents.c | 46 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/inode.c | 41 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 1 | ||||
-rw-r--r-- | fs/ext4/resize.c | 7 | ||||
-rw-r--r-- | fs/ext4/super.c | 174 | ||||
-rw-r--r-- | fs/locks.c | 6 | ||||
-rw-r--r-- | fs/nfs/file.c | 7 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 26 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 25 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 2 | ||||
-rw-r--r-- | fs/select.c | 10 | ||||
-rw-r--r-- | fs/ubifs/sb.c | 8 | ||||
-rw-r--r-- | fs/udf/super.c | 2 |
24 files changed, 364 insertions, 165 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 42704149b723..58b7d14b08ee 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers, work->ordered_func(work); - /* now take the lock again and call the freeing code */ + /* now take the lock again and drop our item from the list */ spin_lock(&workers->order_lock); list_del(&work->order_list); + spin_unlock(&workers->order_lock); + + /* + * we don't want to call the ordered free functions + * with the lock held though + */ work->ordered_free(work); + spin_lock(&workers->order_lock); } spin_unlock(&workers->order_lock); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6b79efd7d75f..3a75ee5a6b33 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -89,6 +89,32 @@ static struct { /* Forward declarations */ static void cifs_readv_complete(struct work_struct *work); +#ifdef CONFIG_HIGHMEM +/* + * On arches that have high memory, kmap address space is limited. By + * serializing the kmap operations on those arches, we ensure that we don't + * end up with a bunch of threads in writeback with partially mapped page + * arrays, stuck waiting for kmap to come back. That situation prevents + * progress and can deadlock. + */ +static DEFINE_MUTEX(cifs_kmap_mutex); + +static inline void +cifs_kmap_lock(void) +{ + mutex_lock(&cifs_kmap_mutex); +} + +static inline void +cifs_kmap_unlock(void) +{ + mutex_unlock(&cifs_kmap_mutex); +} +#else /* !CONFIG_HIGHMEM */ +#define cifs_kmap_lock() do { ; } while(0) +#define cifs_kmap_unlock() do { ; } while(0) +#endif /* CONFIG_HIGHMEM */ + /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ static void mark_open_files_invalid(struct cifs_tcon *pTcon) @@ -1557,6 +1583,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); + cifs_kmap_lock(); list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { if (remaining >= PAGE_CACHE_SIZE) { /* enough data to fill the page */ @@ -1606,6 +1633,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) page_cache_release(page); } } + cifs_kmap_unlock(); /* issue the read if we have any iovecs left to fill */ if (rdata->nr_iov > 1) { @@ -2194,7 +2222,9 @@ cifs_async_writev(struct cifs_writedata *wdata) * and set the iov_len properly for each one. It may also set * wdata->bytes too. */ + cifs_kmap_lock(); wdata->marshal_iov(iov, wdata); + cifs_kmap_unlock(); cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 402fa0f9bc01..65a78e9a5d40 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -238,8 +238,8 @@ static const match_table_t cifs_mount_option_tokens = { enum { Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, Opt_sec_ntlmsspi, Opt_sec_ntlmssp, - Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2i, - Opt_sec_nontlm, Opt_sec_lanman, + Opt_ntlm, Opt_sec_ntlmi, Opt_sec_ntlmv2, + Opt_sec_ntlmv2i, Opt_sec_lanman, Opt_sec_none, Opt_sec_err @@ -253,8 +253,9 @@ static const match_table_t cifs_secflavor_tokens = { { Opt_sec_ntlmssp, "ntlmssp" }, { Opt_ntlm, "ntlm" }, { Opt_sec_ntlmi, "ntlmi" }, + { Opt_sec_ntlmv2, "nontlm" }, + { Opt_sec_ntlmv2, "ntlmv2" }, { Opt_sec_ntlmv2i, "ntlmv2i" }, - { Opt_sec_nontlm, "nontlm" }, { Opt_sec_lanman, "lanman" }, { Opt_sec_none, "none" }, @@ -1163,7 +1164,7 @@ static int cifs_parse_security_flavors(char *value, case Opt_sec_ntlmi: vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN; break; - case Opt_sec_nontlm: + case Opt_sec_ntlmv2: vol->secFlg |= CIFSSEC_MAY_NTLMV2; break; case Opt_sec_ntlmv2i: @@ -3348,6 +3349,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) +/* + * On hosts with high memory, we can't currently support wsize/rsize that are + * larger than we can kmap at once. Cap the rsize/wsize at + * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request + * larger than that anyway. + */ +#ifdef CONFIG_HIGHMEM +#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) +#else /* CONFIG_HIGHMEM */ +#define CIFS_KMAP_SIZE_LIMIT (1<<24) +#endif /* CONFIG_HIGHMEM */ + static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) { @@ -3378,6 +3391,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) wsize = min_t(unsigned int, wsize, server->maxBuf - sizeof(WRITE_REQ) + 4); + /* limit to the amount that we can kmap at once */ + wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -3419,6 +3435,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) if (!(server->capabilities & CAP_LARGE_READ_X)) rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + /* limit to the amount that we can kmap at once */ + rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_RSIZE */ rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0a8224d1c4c5..a4217f02fab2 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { - /* FIXME: check for inode number changes? */ - if (dentry->d_inode != NULL) + inode = dentry->d_inode; + /* update inode in place if i_ino didn't change */ + if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); return dentry; + } d_drop(dentry); dput(dentry); } diff --git a/fs/exec.c b/fs/exec.c index 421e0815f848..9ba382d7e726 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1024,7 +1024,7 @@ static void flush_old_files(struct files_struct * files) unsigned long set, i; j++; - i = j * __NFDBITS; + i = j * BITS_PER_LONG; fdt = files_fdtable(files); if (i >= fdt->max_fds) break; diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 49cf230554a2..24a49d47e935 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios) out: ios->numdevs = devs_in_group; ios->pages_consumed = cur_pg; - if (unlikely(ret)) { - if (length == ios->length) - return ret; - else - ios->length -= length; - } - return 0; + return ret; } int ore_create(struct ore_io_state *ios) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index d222c77cfa1b..fff2070c6751 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) * ios->sp2d[p][*], xor is calculated the same way. These pages are * allocated/freed and don't go through cache */ -static int _read_4_write(struct ore_io_state *ios) +static int _read_4_write_first_stripe(struct ore_io_state *ios) { - struct ore_io_state *ios_read; struct ore_striping_info read_si; struct __stripe_pages_2d *sp2d = ios->sp2d; u64 offset = ios->si.first_stripe_start; - u64 last_stripe_end; - unsigned bytes_in_stripe = ios->si.bytes_in_stripe; - unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; - int ret; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; if (offset == ios->offset) /* Go to start collect $200 */ goto read_last_stripe; @@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios) min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); + ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", + offset, ios->offset, min_p, max_p); + for (c = 0; ; c++) { ore_calc_stripe_info(ios->layout, offset, 0, &read_si); read_si.obj_offset += min_p * PAGE_SIZE; @@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios) } read_last_stripe: + return 0; +} + +static int _read_4_write_last_stripe(struct ore_io_state *ios) +{ + struct ore_striping_info read_si; + struct __stripe_pages_2d *sp2d = ios->sp2d; + u64 offset; + u64 last_stripe_end; + unsigned bytes_in_stripe = ios->si.bytes_in_stripe; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; + offset = ios->offset + ios->length; if (offset % PAGE_SIZE) _add_to_r4w_last_page(ios, &offset); @@ -527,15 +538,15 @@ read_last_stripe: c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); - BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); - /* unaligned IO must be within a single stripe */ - if (min_p == sp2d->pages_in_unit) { /* Didn't do it yet */ min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); } + ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", + offset, last_stripe_end, min_p, max_p); + while (offset < last_stripe_end) { struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; @@ -568,6 +579,15 @@ read_last_stripe: } read_it: + return 0; +} + +static int _read_4_write_execute(struct ore_io_state *ios) +{ + struct ore_io_state *ios_read; + unsigned i; + int ret; + ios_read = ios->ios_read_4_write; if (!ios_read) return 0; @@ -591,6 +611,8 @@ read_it: } _mark_read4write_pages_uptodate(ios_read, ret); + ore_put_io_state(ios_read); + ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */ return 0; } @@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, /* If first stripe, Read in all read4write pages * (if needed) before we calculate the first parity. */ - _read_4_write(ios); + _read_4_write_first_stripe(ios); } + if (!cur_len) /* If last stripe r4w pages of last stripe */ + _read_4_write_last_stripe(ios); + _read_4_write_execute(ios); for (i = 0; i < num_pages; i++) { pages[i] = _raid_page_alloc(); @@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios, int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) { - struct ore_layout *layout = ios->layout; - if (ios->parity_pages) { + struct ore_layout *layout = ios->layout; unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; - unsigned stripe_size = ios->si.bytes_in_stripe; - u64 last_stripe, first_stripe; if (_sp2d_alloc(pages_in_unit, layout->group_width, layout->parity, &ios->sp2d)) { return -ENOMEM; } - - /* Round io down to last full strip */ - first_stripe = div_u64(ios->offset, stripe_size); - last_stripe = div_u64(ios->offset + ios->length, stripe_size); - - /* If an IO spans more then a single stripe it must end at - * a stripe boundary. The reminder at the end is pushed into the - * next IO. - */ - if (last_stripe != first_stripe) { - ios->length = last_stripe * stripe_size - ios->offset; - - BUG_ON(!ios->length); - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / - PAGE_SIZE; - ios->si.length = ios->length; /*make it consistent */ - } } return 0; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8da837be0c82..df76291d692b 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -584,7 +584,8 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) if (bitmap_bh == NULL) continue; - x = ext4_count_free(bitmap_bh, sb->s_blocksize); + x = ext4_count_free(bitmap_bh->b_data, + EXT4_BLOCKS_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", i, ext4_free_group_clusters(sb, gdp), x); bitmap_count += x; diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index fa3af81ac565..bbde5d582978 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -11,21 +11,15 @@ #include <linux/jbd2.h> #include "ext4.h" -#ifdef EXT4FS_DEBUG - static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; -unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) +unsigned int ext4_count_free(char *bitmap, unsigned int numchars) { unsigned int i, sum = 0; - if (!map) - return 0; for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; + sum += nibblemap[bitmap[i] & 0xf] + + nibblemap[(bitmap[i] >> 4) & 0xf]; return sum; } -#endif /* EXT4FS_DEBUG */ - diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0e01e90add8b..47d1c8cda0cb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1140,8 +1140,7 @@ struct ext4_sb_info { unsigned long s_desc_per_block; /* Number of group descriptors per block */ ext4_group_t s_groups_count; /* Number of groups in the fs */ ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ - unsigned long s_overhead_last; /* Last calculated overhead */ - unsigned long s_blocks_last; /* Last seen block count */ + unsigned long s_overhead; /* # of fs overhead clusters */ unsigned int s_cluster_ratio; /* Number of blocks per cluster */ unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ @@ -1783,7 +1782,7 @@ struct mmpd_data { # define NORET_AND noreturn, /* bitmap.c */ -extern unsigned int ext4_count_free(struct buffer_head *, unsigned); +extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); /* balloc.c */ extern unsigned int ext4_block_group(struct super_block *sb, @@ -1950,6 +1949,7 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern int ext4_calculate_overhead(struct super_block *sb); extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index abcdeab67f52..6b7daa45f6bf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2503,10 +2503,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); - struct ext4_ext_path *path; + struct ext4_ext_path *path = NULL; ext4_fsblk_t partial_cluster = 0; handle_t *handle; - int i, err; + int i = 0, err; ext_debug("truncate since %u to %u\n", start, end); @@ -2539,8 +2539,12 @@ again: } depth = ext_depth(inode); ex = path[depth].p_ext; - if (!ex) + if (!ex) { + ext4_ext_drop_refs(path); + kfree(path); + path = NULL; goto cont; + } ee_block = le32_to_cpu(ex->ee_block); @@ -2570,8 +2574,6 @@ again: if (err < 0) goto out; } - ext4_ext_drop_refs(path); - kfree(path); } cont: @@ -2580,19 +2582,27 @@ cont: * after i_size and walking into the tree depth-wise. */ depth = ext_depth(inode); - path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); - if (path == NULL) { - ext4_journal_stop(handle); - return -ENOMEM; - } - path[0].p_depth = depth; - path[0].p_hdr = ext_inode_hdr(inode); + if (path) { + int k = i = depth; + while (--k > 0) + path[k].p_block = + le16_to_cpu(path[k].p_hdr->eh_entries)+1; + } else { + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), + GFP_NOFS); + if (path == NULL) { + ext4_journal_stop(handle); + return -ENOMEM; + } + path[0].p_depth = depth; + path[0].p_hdr = ext_inode_hdr(inode); - if (ext4_ext_check(inode, path[0].p_hdr, depth)) { - err = -EIO; - goto out; + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { + err = -EIO; + goto out; + } } - i = err = 0; + err = 0; while (i >= 0 && err == 0) { if (i == depth) { @@ -2706,8 +2716,10 @@ cont: out: ext4_ext_drop_refs(path); kfree(path); - if (err == -EAGAIN) + if (err == -EAGAIN) { + path = NULL; goto again; + } ext4_journal_stop(handle); return err; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8900f8b2ad48..0ee374d27fdb 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1013,7 +1013,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) if (!bitmap_bh) continue; - x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); + x = ext4_count_free(bitmap_bh->b_data, + EXT4_INODES_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); bitmap_count += x; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c77b0bd2c711..55a654d86182 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -279,6 +279,15 @@ void ext4_da_update_reserve_space(struct inode *inode, used = ei->i_reserved_data_blocks; } + if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " + "with only %d reserved metadata blocks\n", __func__, + inode->i_ino, ei->i_allocated_meta_blocks, + ei->i_reserved_meta_blocks); + WARN_ON(1); + ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; + } + /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; @@ -1104,6 +1113,17 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; int ret; + ext4_lblk_t save_last_lblock; + int save_len; + + /* + * We will charge metadata quota at writeout time; this saves + * us from metadata over-estimation, though we may go over by + * a small amount in the end. Here we just reserve for data. + */ + ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); + if (ret) + return ret; /* * recalculate the amount of metadata blocks to reserve @@ -1112,32 +1132,31 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) */ repeat: spin_lock(&ei->i_block_reservation_lock); + /* + * ext4_calc_metadata_amount() has side effects, which we have + * to be prepared undo if we fail to claim space. + */ + save_len = ei->i_da_metadata_calc_len; + save_last_lblock = ei->i_da_metadata_calc_last_lblock; md_needed = EXT4_NUM_B2C(sbi, ext4_calc_metadata_amount(inode, lblock)); trace_ext4_da_reserve_space(inode, md_needed); - spin_unlock(&ei->i_block_reservation_lock); /* - * We will charge metadata quota at writeout time; this saves - * us from metadata over-estimation, though we may go over by - * a small amount in the end. Here we just reserve for data. - */ - ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); - if (ret) - return ret; - /* * We do still charge estimated metadata to the sb though; * we cannot afford to run out of free blocks. */ if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { - dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); + ei->i_da_metadata_calc_len = save_len; + ei->i_da_metadata_calc_last_lblock = save_last_lblock; + spin_unlock(&ei->i_block_reservation_lock); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); goto repeat; } + dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } - spin_lock(&ei->i_block_reservation_lock); ei->i_reserved_data_blocks++; ei->i_reserved_meta_blocks += md_needed; spin_unlock(&ei->i_block_reservation_lock); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1365903a5144..9727522e2716 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -261,7 +261,6 @@ group_extend_out: err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); - mnt_drop_write(filp->f_path.mnt); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 53589ff8824b..3407a62590d6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1141,7 +1141,7 @@ static void ext4_update_super(struct super_block *sb, struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int i; + int i, ret; BUG_ON(flex_gd->count == 0 || group_data == NULL); /* @@ -1216,6 +1216,11 @@ static void ext4_update_super(struct super_block *sb, &sbi->s_flex_groups[flex_group].free_inodes); } + /* + * Update the fs overhead information + */ + ext4_calculate_overhead(sb); + if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7449e77b9728..65f3e0d203ae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2944,6 +2944,114 @@ static void ext4_destroy_lazyinit_thread(void) kthread_stop(ext4_lazyinit_task); } +/* + * Note: calculating the overhead so we can be compatible with + * historical BSD practice is quite difficult in the face of + * clusters/bigalloc. This is because multiple metadata blocks from + * different block group can end up in the same allocation cluster. + * Calculating the exact overhead in the face of clustered allocation + * requires either O(all block bitmaps) in memory or O(number of block + * groups**2) in time. We will still calculate the superblock for + * older file systems --- and if we come across with a bigalloc file + * system with zero in s_overhead_clusters the estimate will be close to + * correct especially for very large cluster sizes --- but for newer + * file systems, it's better to calculate this figure once at mkfs + * time, and store it in the superblock. If the superblock value is + * present (even for non-bigalloc file systems), we will use it. + */ +static int count_overhead(struct super_block *sb, ext4_group_t grp, + char *buf) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp; + ext4_fsblk_t first_block, last_block, b; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); + int s, j, count = 0; + + first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + + (grp * EXT4_BLOCKS_PER_GROUP(sb)); + last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; + for (i = 0; i < ngroups; i++) { + gdp = ext4_get_group_desc(sb, i, NULL); + b = ext4_block_bitmap(sb, gdp); + if (b >= first_block && b <= last_block) { + ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); + count++; + } + b = ext4_inode_bitmap(sb, gdp); + if (b >= first_block && b <= last_block) { + ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); + count++; + } + b = ext4_inode_table(sb, gdp); + if (b >= first_block && b + sbi->s_itb_per_group <= last_block) + for (j = 0; j < sbi->s_itb_per_group; j++, b++) { + int c = EXT4_B2C(sbi, b - first_block); + ext4_set_bit(c, buf); + count++; + } + if (i != grp) + continue; + s = 0; + if (ext4_bg_has_super(sb, grp)) { + ext4_set_bit(s++, buf); + count++; + } + for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { + ext4_set_bit(EXT4_B2C(sbi, s++), buf); + count++; + } + } + if (!count) + return 0; + return EXT4_CLUSTERS_PER_GROUP(sb) - + ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); +} + +/* + * Compute the overhead and stash it in sbi->s_overhead + */ +int ext4_calculate_overhead(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); + ext4_fsblk_t overhead = 0; + char *buf = (char *) get_zeroed_page(GFP_KERNEL); + + memset(buf, 0, PAGE_SIZE); + if (!buf) + return -ENOMEM; + + /* + * Compute the overhead (FS structures). This is constant + * for a given filesystem unless the number of block groups + * changes so we cache the previous value until it does. + */ + + /* + * All of the blocks before first_data_block are overhead + */ + overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); + + /* + * Add the overhead found in each block group + */ + for (i = 0; i < ngroups; i++) { + int blks; + + blks = count_overhead(sb, i, buf); + overhead += blks; + if (blks) + memset(buf, 0, PAGE_SIZE); + cond_resched(); + } + sbi->s_overhead = overhead; + smp_wmb(); + free_page((unsigned long) buf); + return 0; +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) { char *orig_data = kstrdup(data, GFP_KERNEL); @@ -3559,6 +3667,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) no_journal: /* + * Get the # of file system overhead blocks from the + * superblock if present. + */ + if (es->s_overhead_clusters) + sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); + else { + ret = ext4_calculate_overhead(sb); + if (ret) + goto failed_mount_wq; + } + + /* * The maximum number of concurrent works can be high and * concurrency isn't really necessary. Limit it to 1. */ @@ -4421,67 +4541,21 @@ restore_opts: return err; } -/* - * Note: calculating the overhead so we can be compatible with - * historical BSD practice is quite difficult in the face of - * clusters/bigalloc. This is because multiple metadata blocks from - * different block group can end up in the same allocation cluster. - * Calculating the exact overhead in the face of clustered allocation - * requires either O(all block bitmaps) in memory or O(number of block - * groups**2) in time. We will still calculate the superblock for - * older file systems --- and if we come across with a bigalloc file - * system with zero in s_overhead_clusters the estimate will be close to - * correct especially for very large cluster sizes --- but for newer - * file systems, it's better to calculate this figure once at mkfs - * time, and store it in the superblock. If the superblock value is - * present (even for non-bigalloc file systems), we will use it. - */ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - struct ext4_group_desc *gdp; + ext4_fsblk_t overhead = 0; u64 fsid; s64 bfree; - if (test_opt(sb, MINIX_DF)) { - sbi->s_overhead_last = 0; - } else if (es->s_overhead_clusters) { - sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters); - } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { - ext4_group_t i, ngroups = ext4_get_groups_count(sb); - ext4_fsblk_t overhead = 0; - - /* - * Compute the overhead (FS structures). This is constant - * for a given filesystem unless the number of block groups - * changes so we cache the previous value until it does. - */ - - /* - * All of the blocks before first_data_block are - * overhead - */ - overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); - - /* - * Add the overhead found in each block group - */ - for (i = 0; i < ngroups; i++) { - gdp = ext4_get_group_desc(sb, i, NULL); - overhead += ext4_num_overhead_clusters(sb, i, gdp); - cond_resched(); - } - sbi->s_overhead_last = overhead; - smp_wmb(); - sbi->s_blocks_last = ext4_blocks_count(es); - } + if (!test_opt(sb, MINIX_DF)) + overhead = sbi->s_overhead; buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = (ext4_blocks_count(es) - - EXT4_C2B(sbi, sbi->s_overhead_last)); + buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead); bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); /* prevent underflow in case that few free space is available */ diff --git a/fs/locks.c b/fs/locks.c index 6a64f154db04..fcc50ab71cc6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock, return 0; } -static int assign_type(struct file_lock *fl, int type) +static int assign_type(struct file_lock *fl, long type) { switch (type) { case F_RDLCK: @@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, int type, struct file_lock *fl) +static int lease_init(struct file *filp, long type, struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, int type) +static struct file_lock *lease_alloc(struct file *filp, long type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index aa9b709fd328..f0f439dfeaa3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -451,8 +451,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not + * doing this memory reclaim for a fs-related allocation. + */ + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && + !(current->flags & PF_FSTRANS)) { int how = FLUSH_SYNC; /* Don't let kswapd deadlock waiting for OOM RPC calls */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 93aa3a4c4b0f..929ba0111724 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -205,12 +205,18 @@ static int nfs_idmap_init_keyring(void) if (ret < 0) goto failed_put_key; + ret = register_key_type(&key_type_id_resolver_legacy); + if (ret < 0) + goto failed_reg_legacy; + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; id_resolver_cache = cred; return 0; +failed_reg_legacy: + unregister_key_type(&key_type_id_resolver); failed_put_key: key_put(keyring); failed_put_cred: @@ -222,6 +228,7 @@ static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); + unregister_key_type(&key_type_id_resolver_legacy); put_cred(id_resolver_cache); } @@ -385,7 +392,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = { }; static struct key_type key_type_id_resolver_legacy = { - .name = "id_resolver", + .name = "id_legacy", .instantiate = user_instantiate, .match = user_match, .revoke = user_revoke, @@ -658,6 +665,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, if (ret < 0) goto out2; + BUG_ON(idmap->idmap_key_cons != NULL); idmap->idmap_key_cons = cons; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); @@ -671,8 +679,7 @@ out2: out1: kfree(msg); out0: - key_revoke(cons->key); - key_revoke(cons->authkey); + complete_request_key(cons, ret); return ret; } @@ -706,11 +713,18 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons = idmap->idmap_key_cons; + struct key_construction *cons; struct idmap_msg im; size_t namelen_in; int ret; + /* If instantiation is successful, anyone waiting for key construction + * will have been woken up and someone else may now have used + * idmap_key_cons - so after this point we may no longer touch it. + */ + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + if (mlen != sizeof(im)) { ret = -ENOSPC; goto out; @@ -723,7 +737,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { ret = mlen; - complete_request_key(idmap->idmap_key_cons, -ENOKEY); + complete_request_key(cons, -ENOKEY); goto out_incomplete; } @@ -740,7 +754,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } out: - complete_request_key(idmap->idmap_key_cons, ret); + complete_request_key(cons, ret); out_incomplete: return ret; } diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 4bff4a3dab46..1afe74c42c8a 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -453,7 +453,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata) objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, rdata->args.offset, rdata->args.count); - return ore_read(objios->ios); + ret = ore_read(objios->ios); + if (unlikely(ret)) + objio_free_result(&objios->oir); + return ret; } /* @@ -484,8 +487,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) struct objio_state *objios = priv; struct nfs_write_data *wdata = objios->oir.rpcdata; pgoff_t index = offset / PAGE_SIZE; - struct page *page = find_get_page(wdata->inode->i_mapping, index); + struct page *page; + loff_t i_size = i_size_read(wdata->inode); + + if (offset >= i_size) { + *uptodate = true; + dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); + return ZERO_PAGE(0); + } + page = find_get_page(wdata->inode->i_mapping, index); if (!page) { page = find_or_create_page(wdata->inode->i_mapping, index, GFP_NOFS); @@ -506,8 +517,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) static void __r4w_put_page(void *priv, struct page *page) { - dprintk("%s: index=0x%lx\n", __func__, page->index); - page_cache_release(page); + dprintk("%s: index=0x%lx\n", __func__, + (page == ZERO_PAGE(0)) ? -1UL : page->index); + if (ZERO_PAGE(0) != page) + page_cache_release(page); return; } @@ -537,8 +550,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) dprintk("%s: offset=0x%llx length=0x%x\n", __func__, wdata->args.offset, wdata->args.count); ret = ore_write(objios->ios); - if (unlikely(ret)) + if (unlikely(ret)) { + objio_free_result(&objios->oir); return ret; + } if (objios->sync) _write_done(objios->ios, objios); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 74c00bc92b9a..283d15e9f46d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2233,7 +2233,7 @@ out_acl: if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(1); + WRITE32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { if ((buflen -= 4) < 0) diff --git a/fs/select.c b/fs/select.c index 17d33d09fc16..0baa0a351a1c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -345,8 +345,8 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) struct fdtable *fdt; /* handle last in-complete long-word first */ - set = ~(~0UL << (n & (__NFDBITS-1))); - n /= __NFDBITS; + set = ~(~0UL << (n & (BITS_PER_LONG-1))); + n /= BITS_PER_LONG; fdt = files_fdtable(current->files); open_fds = fdt->open_fds + n; max = 0; @@ -373,7 +373,7 @@ get_max: max++; set >>= 1; } while (set); - max += n * __NFDBITS; + max += n * BITS_PER_LONG; } return max; @@ -435,11 +435,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; if (all_bits == 0) { - i += __NFDBITS; + i += BITS_PER_LONG; continue; } - for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { + for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { int fput_needed; if (i >= n) break; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 771f7fb6ce92..a7be8e2b4b9d 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -724,8 +724,12 @@ static int fixup_free_space(struct ubifs_info *c) lnum = ubifs_next_log_lnum(c, lnum); } - /* Fixup the current log head */ - err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); if (err) goto out; diff --git a/fs/udf/super.c b/fs/udf/super.c index 8d86a8706c0e..e660ffdd91b6 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1283,7 +1283,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; table_len = le32_to_cpu(lvd->mapTableLength); - if (sizeof(*lvd) + table_len > sb->s_blocksize) { + if (table_len > sb->s_blocksize - sizeof(*lvd)) { udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); |