diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/cifs/cifsfs.c | 8 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 10 | ||||
-rw-r--r-- | fs/cifs/cifssmb.c | 9 | ||||
-rw-r--r-- | fs/cifs/connect.c | 14 | ||||
-rw-r--r-- | fs/cifs/dir.c | 6 | ||||
-rw-r--r-- | fs/cifs/file.c | 23 | ||||
-rw-r--r-- | fs/cifs/transport.c | 4 | ||||
-rw-r--r-- | fs/dcache.c | 3 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 56 | ||||
-rw-r--r-- | fs/ext4/extents.c | 2 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 46 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 11 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 25 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 2 | ||||
-rw-r--r-- | fs/lockd/svc.c | 2 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 4 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 11 | ||||
-rw-r--r-- | fs/nfs/delegation.h | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 3 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 64 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 33 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 17 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 9 | ||||
-rw-r--r-- | fs/sysfs/inode.c | 11 | ||||
-rw-r--r-- | fs/udf/file.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 33 |
30 files changed, 271 insertions, 162 deletions
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8f1fe324162b..b4c2c998acdd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -76,7 +76,7 @@ MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " unsigned int cifs_max_pending = CIFS_MAX_REQ; module_param(cifs_max_pending, int, 0444); MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " - "Default: 50 Range: 2 to 256"); + "Default: 32767 Range: 2 to 32767."); unsigned short echo_retries = 5; module_param(echo_retries, ushort, 0644); MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " @@ -1116,9 +1116,9 @@ init_cifs(void) if (cifs_max_pending < 2) { cifs_max_pending = 2; cFYI(1, "cifs_max_pending set to min of 2"); - } else if (cifs_max_pending > 256) { - cifs_max_pending = 256; - cFYI(1, "cifs_max_pending set to max of 256"); + } else if (cifs_max_pending > CIFS_MAX_REQ) { + cifs_max_pending = CIFS_MAX_REQ; + cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ); } rc = cifs_fscache_register(); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8238aa13e01c..c467ac89a821 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -55,14 +55,9 @@ /* * MAX_REQ is the maximum number of requests that WE will send - * on one socket concurrently. It also matches the most common - * value of max multiplex returned by servers. We may - * eventually want to use the negotiated value (in case - * future servers can handle more) when we are more confident that - * we will not have problems oveloading the socket with pending - * write data. + * on one socket concurrently. */ -#define CIFS_MAX_REQ 50 +#define CIFS_MAX_REQ 32767 #define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) @@ -263,6 +258,7 @@ struct TCP_Server_Info { bool session_estab; /* mark when very first sess is established */ u16 dialect; /* dialect index that server chose */ enum securityEnum secType; + bool oplocks:1; /* enable oplocks */ unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ /* multiplexed reads or writes */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6600aa2d2ef3..0e6adac01f11 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -458,7 +458,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) goto neg_err_exit; } server->sec_mode = (__u8)le16_to_cpu(rsp->SecurityMode); - server->maxReq = le16_to_cpu(rsp->MaxMpxCount); + server->maxReq = min_t(unsigned int, + le16_to_cpu(rsp->MaxMpxCount), + cifs_max_pending); + server->oplocks = server->maxReq > 1 ? enable_oplocks : false; server->maxBuf = le16_to_cpu(rsp->MaxBufSize); server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); /* even though we do not use raw we might as well set this @@ -564,7 +567,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) /* one byte, so no need to convert this or EncryptionKeyLen from little endian */ - server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); + server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), + cifs_max_pending); + server->oplocks = server->maxReq > 1 ? enable_oplocks : false; /* probably no need to store and check maxvcs */ server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 720edf52dbf8..9e0675a74cf2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -625,14 +625,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) spin_unlock(&GlobalMid_Lock); wake_up_all(&server->response_q); - /* - * Check if we have blocked requests that need to free. Note that - * cifs_max_pending is normally 50, but can be set at module install - * time to as little as two. - */ + /* Check if we have blocked requests that need to free. */ spin_lock(&GlobalMid_Lock); - if (atomic_read(&server->inFlight) >= cifs_max_pending) - atomic_set(&server->inFlight, cifs_max_pending - 1); + if (atomic_read(&server->inFlight) >= server->maxReq) + atomic_set(&server->inFlight, server->maxReq - 1); /* * We do not want to set the max_pending too low or we could end up * with the counter going negative. @@ -1890,6 +1886,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->noautotune = volume_info->noautotune; tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; atomic_set(&tcp_ses->inFlight, 0); + tcp_ses->maxReq = 1; /* enough to send negotiate request */ init_waitqueue_head(&tcp_ses->response_q); init_waitqueue_head(&tcp_ses->request_q); INIT_LIST_HEAD(&tcp_ses->pending_mid_q); @@ -3220,7 +3217,7 @@ cifs_ra_pages(struct cifs_sb_info *cifs_sb) int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { - int rc = 0; + int rc; int xid; struct cifs_ses *pSesInfo; struct cifs_tcon *tcon; @@ -3247,6 +3244,7 @@ try_mount_again: FreeXid(xid); } #endif + rc = 0; tcon = NULL; pSesInfo = NULL; srvTcp = NULL; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index bf68b4fc9512..6937e7c19ee3 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -171,7 +171,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, } tcon = tlink_tcon(tlink); - if (enable_oplocks) + if (tcon->ses->server->oplocks) oplock = REQ_OPLOCK; if (nd) @@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; + __u32 oplock; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -518,6 +518,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); + oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0; + /* * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5e64748a2917..159fcc56dc2d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -380,7 +380,7 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, "inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags, full_path); - if (enable_oplocks) + if (tcon->ses->server->oplocks) oplock = REQ_OPLOCK; else oplock = 0; @@ -505,7 +505,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, pCifsFile->f_flags, full_path); - if (enable_oplocks) + if (tcon->ses->server->oplocks) oplock = REQ_OPLOCK; else oplock = 0; @@ -960,9 +960,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) INIT_LIST_HEAD(&locks_to_send); /* - * Allocating count locks is enough because no locks can be added to - * the list while we are holding cinode->lock_mutex that protects - * locking operations of this inode. + * Allocating count locks is enough because no FL_POSIX locks can be + * added to the list while we are holding cinode->lock_mutex that + * protects locking operations of this inode. */ for (; i < count; i++) { lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); @@ -973,18 +973,20 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) list_add_tail(&lck->llist, &locks_to_send); } - i = 0; el = locks_to_send.next; lock_flocks(); cifs_for_each_lock(cfile->dentry->d_inode, before) { + flock = *before; + if ((flock->fl_flags & FL_POSIX) == 0) + continue; if (el == &locks_to_send) { - /* something is really wrong */ + /* + * The list ended. We don't have enough allocated + * structures - something is really wrong. + */ cERROR(1, "Can't push all brlocks!"); break; } - flock = *before; - if ((flock->fl_flags & FL_POSIX) == 0) - continue; length = 1 + flock->fl_end - flock->fl_start; if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) type = CIFS_RDLCK; @@ -996,7 +998,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->length = length; lck->type = type; lck->offset = flock->fl_start; - i++; el = el->next; } unlock_flocks(); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 0cc9584f5889..99a27cfa6cd2 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -265,12 +265,12 @@ static int wait_for_free_request(struct TCP_Server_Info *server, spin_lock(&GlobalMid_Lock); while (1) { - if (atomic_read(&server->inFlight) >= cifs_max_pending) { + if (atomic_read(&server->inFlight) >= server->maxReq) { spin_unlock(&GlobalMid_Lock); cifs_num_waiters_inc(server); wait_event(server->request_q, atomic_read(&server->inFlight) - < cifs_max_pending); + < server->maxReq); cifs_num_waiters_dec(server); spin_lock(&GlobalMid_Lock); } else { diff --git a/fs/dcache.c b/fs/dcache.c index 83e8ecda6384..d47b2660eea1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2358,6 +2358,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) if (d_ancestor(alias, dentry)) { /* Check for loops */ actual = ERR_PTR(-ELOOP); + spin_unlock(&inode->i_lock); } else if (IS_ROOT(alias)) { /* Is this an anonymous mountpoint that we * could splice into our tree? */ @@ -2367,7 +2368,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto found; } else { /* Nope, but we must(!) avoid directory - * aliasing */ + * aliasing. This drops inode->i_lock */ actual = __d_unalias(inode, dentry, alias); } write_sequnlock(&rename_lock); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5b0e26a1272d..dbae4d98dab0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -184,6 +184,8 @@ struct mpage_da_data { #define EXT4_IO_END_UNWRITTEN 0x0001 #define EXT4_IO_END_ERROR 0x0002 #define EXT4_IO_END_QUEUED 0x0004 +#define EXT4_IO_END_DIRECT 0x0008 +#define EXT4_IO_END_IN_FSYNC 0x0010 struct ext4_io_page { struct page *p_page; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5802fa1dab18..95af6f878501 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -261,43 +261,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, /* super.c */ int ext4_force_commit(struct super_block *sb); -static inline int ext4_should_journal_data(struct inode *inode) +/* + * Ext4 inode journal modes + */ +#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ +#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ +#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ + +static inline int ext4_inode_journal_mode(struct inode *inode) { if (EXT4_JOURNAL(inode) == NULL) - return 0; - if (!S_ISREG(inode->i_mode)) - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return 1; - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) - return 1; - return 0; + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ + /* We do not support data journalling with delayed allocation */ + if (!S_ISREG(inode->i_mode) || + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && + !test_opt(inode->i_sb, DELALLOC)) + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ + else + BUG(); +} + +static inline int ext4_should_journal_data(struct inode *inode) +{ + return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; } static inline int ext4_should_order_data(struct inode *inode) { - if (EXT4_JOURNAL(inode) == NULL) - return 0; - if (!S_ISREG(inode->i_mode)) - return 0; - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - return 1; - return 0; + return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; } static inline int ext4_should_writeback_data(struct inode *inode) { - if (EXT4_JOURNAL(inode) == NULL) - return 1; - if (!S_ISREG(inode->i_mode)) - return 0; - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - return 1; - return 0; + return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; } /* diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 607b1557d292..75070365f479 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -301,6 +301,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + if (len == 0) + return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 00a2cb753efd..bb6c7d811313 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode) io = list_entry(ei->i_completed_io_list.next, ext4_io_end_t, list); list_del_init(&io->list); + io->flag |= EXT4_IO_END_IN_FSYNC; /* * Calling ext4_end_io_nolock() to convert completed * IO to written. @@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode) if (ret < 0) ret2 = ret; spin_lock_irqsave(&ei->i_completed_io_lock, flags); + io->flag &= ~EXT4_IO_END_IN_FSYNC; } spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); return (ret2 < 0) ? ret2 : 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 92655fd89657..3ce761373241 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2480,13 +2480,14 @@ static int ext4_da_write_end(struct file *file, int write_mode = (int)(unsigned long)fsdata; if (write_mode == FALL_BACK_TO_NONDELALLOC) { - if (ext4_should_order_data(inode)) { + switch (ext4_inode_journal_mode(inode)) { + case EXT4_INODE_ORDERED_DATA_MODE: return ext4_ordered_write_end(file, mapping, pos, len, copied, page, fsdata); - } else if (ext4_should_writeback_data(inode)) { + case EXT4_INODE_WRITEBACK_DATA_MODE: return ext4_writeback_write_end(file, mapping, pos, len, copied, page, fsdata); - } else { + default: BUG(); } } @@ -2793,9 +2794,6 @@ out: /* queue the work to convert unwritten extents to written */ queue_work(wq, &io_end->work); - - /* XXX: probably should move into the real I/O completion handler */ - inode_dio_done(inode); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) @@ -2919,9 +2917,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, iocb->private = NULL; EXT4_I(inode)->cur_aio_dio = NULL; if (!is_sync_kiocb(iocb)) { - iocb->private = ext4_init_io_end(inode, GFP_NOFS); - if (!iocb->private) + ext4_io_end_t *io_end = + ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) return -ENOMEM; + io_end->flag |= EXT4_IO_END_DIRECT; + iocb->private = io_end; /* * we save the io structure for current async * direct IO, so that later ext4_map_blocks() @@ -3084,18 +3085,25 @@ static const struct address_space_operations ext4_da_aops = { void ext4_set_aops(struct inode *inode) { - if (ext4_should_order_data(inode) && - test_opt(inode->i_sb, DELALLOC)) - inode->i_mapping->a_ops = &ext4_da_aops; - else if (ext4_should_order_data(inode)) - inode->i_mapping->a_ops = &ext4_ordered_aops; - else if (ext4_should_writeback_data(inode) && - test_opt(inode->i_sb, DELALLOC)) - inode->i_mapping->a_ops = &ext4_da_aops; - else if (ext4_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext4_writeback_aops; - else + switch (ext4_inode_journal_mode(inode)) { + case EXT4_INODE_ORDERED_DATA_MODE: + if (test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else + inode->i_mapping->a_ops = &ext4_ordered_aops; + break; + case EXT4_INODE_WRITEBACK_DATA_MODE: + if (test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else + inode->i_mapping->a_ops = &ext4_writeback_aops; + break; + case EXT4_INODE_JOURNAL_DATA_MODE: inode->i_mapping->a_ops = &ext4_journalled_aops; + break; + default: + BUG(); + } } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7e106c810c62..24feb1ced3fd 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -111,6 +111,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io) if (io->iocb) aio_complete(io->iocb, io->result, 0); + if (io->flag & EXT4_IO_END_DIRECT) + inode_dio_done(inode); /* Wake up anyone waiting on unwritten extent conversion */ if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) wake_up_all(ext4_ioend_wq(io->inode)); @@ -128,12 +130,18 @@ static void ext4_end_io_work(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&ei->i_completed_io_lock, flags); + if (io->flag & EXT4_IO_END_IN_FSYNC) + goto requeue; if (list_empty(&io->list)) { spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); goto free; } if (!mutex_trylock(&inode->i_mutex)) { + bool was_queued; +requeue: + was_queued = !!(io->flag & EXT4_IO_END_QUEUED); + io->flag |= EXT4_IO_END_QUEUED; spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); /* * Requeue the work instead of waiting so that the work @@ -146,9 +154,8 @@ static void ext4_end_io_work(struct work_struct *work) * yield the cpu if it sees an end_io request that has already * been requeued. */ - if (io->flag & EXT4_IO_END_QUEUED) + if (was_queued) yield(); - io->flag |= EXT4_IO_END_QUEUED; return; } list_del_init(&io->list); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0be5a78598d0..2d0ca246a508 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -238,17 +238,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, loff_t isize; ssize_t retval = 0; - mutex_lock(&inode->i_mutex); - /* validate length */ if (len == 0) goto out; - isize = i_size_read(inode); - if (!isize) - goto out; - - end_index = (isize - 1) >> huge_page_shift(h); for (;;) { struct page *page; unsigned long nr, ret; @@ -256,18 +249,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, /* nr is the maximum number of bytes to copy from this page */ nr = huge_page_size(h); + isize = i_size_read(inode); + if (!isize) + goto out; + end_index = (isize - 1) >> huge_page_shift(h); if (index >= end_index) { if (index > end_index) goto out; nr = ((isize - 1) & ~huge_page_mask(h)) + 1; - if (nr <= offset) { + if (nr <= offset) goto out; - } } nr = nr - offset; /* Find the page */ - page = find_get_page(mapping, index); + page = find_lock_page(mapping, index); if (unlikely(page == NULL)) { /* * We have a HOLE, zero out the user-buffer for the @@ -279,17 +275,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, else ra = 0; } else { + unlock_page(page); + /* * We have the page, copy it to user space buffer. */ ra = hugetlbfs_read_actor(page, offset, buf, len, nr); ret = ra; + page_cache_release(page); } if (ra < 0) { if (retval == 0) retval = ra; - if (page) - page_cache_release(page); goto out; } @@ -299,16 +296,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, index += offset >> huge_page_shift(h); offset &= ~huge_page_mask(h); - if (page) - page_cache_release(page); - /* short read or no more work */ if ((ret != nr) || (len == 0)) break; } out: *ppos = ((loff_t)index << huge_page_shift(h)) + offset; - mutex_unlock(&inode->i_mutex); return retval; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a0e41a4c080e..8267de588982 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1948,6 +1948,8 @@ zap_buffer_unlocked: clear_buffer_mapped(bh); clear_buffer_req(bh); clear_buffer_new(bh); + clear_buffer_delay(bh); + clear_buffer_unwritten(bh); bh->b_bdev = NULL; return may_free; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index c061b9aa7ddb..2444780f5cfa 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -440,7 +440,7 @@ static int param_set_##name(const char *val, struct kernel_param *kp) \ __typeof__(type) num = which_strtol(val, &endp, 0); \ if (endp == val || *endp || num < (min) || num > (max)) \ return -EINVAL; \ - *((int *) kp->arg) = num; \ + *((type *) kp->arg) = num; \ return 0; \ } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 726e59a9e50f..168cb932db22 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -9,6 +9,8 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/ratelimit.h> +#include <linux/printk.h> #include <linux/slab.h> #include <linux/sunrpc/bc_xprt.h> #include "nfs4_fs.h" @@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (hdr->minorversion <= 1) { hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ } else { - printk(KERN_WARNING "%s: NFSv4 server callback with " + pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " "illegal minor version %u!\n", __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f2654069806..ac889af8ccf5 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -466,6 +466,17 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +void nfs_remove_bad_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); + if (delegation) { + nfs_inode_find_state_and_recover(inode, &delegation->stateid); + nfs_free_delegation(delegation); + } +} + /** * nfs_expire_all_delegation_types * @clp: client to process diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d9322e490c56..691a79609184 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -45,6 +45,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); +void nfs_remove_bad_delegation(struct inode *inode); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 693ae22f8731..0983b2581ea3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -191,6 +191,7 @@ struct nfs4_exception { long timeout; int retry; struct nfs4_state *state; + struct inode *inode; }; struct nfs4_state_recovery_ops { @@ -324,6 +325,8 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); +extern void nfs_inode_find_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e52703080c6d..d9457002ca04 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -257,15 +257,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; + struct inode *inode = exception->inode; int ret = errorcode; exception->retry = 0; switch(errorcode) { case 0: return 0; + case -NFS4ERR_OPENMODE: + if (nfs_have_delegation(inode, FMODE_READ)) { + nfs_inode_return_delegation(inode); + exception->retry = 1; + return 0; + } + if (state == NULL) + break; + nfs4_schedule_stateid_recovery(server, state); + goto wait_on_recovery; + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); if (state == NULL) break; nfs4_schedule_stateid_recovery(server, state); @@ -1316,8 +1329,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state * The show must go on: exit, but mark the * stateid as needing recovery. */ + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + nfs_inode_find_state_and_recover(state->inode, + stateid); nfs4_schedule_stateid_recovery(server, state); case -EKEYEXPIRED: /* @@ -1822,7 +1838,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server %s " + pr_warn_ratelimited("NFS: v4 server %s " " returned a bad sequence-id error!\n", NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; @@ -1893,7 +1909,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .state = state, + .inode = inode, + }; int err; do { err = nfs4_handle_exception(server, @@ -2223,11 +2242,12 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, switch (err) { case 0: case -NFS4ERR_WRONGSEC: - break; + goto out; default: err = nfs4_handle_exception(server, err, &exception); } } while (exception.retry); +out: return err; } @@ -3707,8 +3727,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (task->tk_status >= 0) return 0; switch(task->tk_status) { + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -4526,7 +4549,9 @@ out: static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .state = state, + }; int err; do { @@ -4619,6 +4644,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) * The show must go on: exit, but mark the * stateid as needing recovery. */ + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: @@ -5957,21 +5983,22 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) return; switch (task->tk_status) { /* Just ignore these failures */ - case NFS4ERR_DELEG_REVOKED: /* layout was recalled */ - case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */ - case NFS4ERR_BADLAYOUT: /* no layout */ - case NFS4ERR_GRACE: /* loca_recalim always false */ + case -NFS4ERR_DELEG_REVOKED: /* layout was recalled */ + case -NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */ + case -NFS4ERR_BADLAYOUT: /* no layout */ + case -NFS4ERR_GRACE: /* loca_recalim always false */ task->tk_status = 0; - } - - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; - } - - if (task->tk_status == 0) + break; + case 0: nfs_post_op_update_inode_force_wcc(data->args.inode, data->res.fattr); + break; + default: + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + rpc_restart_call_prepare(task); + return; + } + } } static void nfs4_layoutcommit_release(void *calldata) @@ -6074,11 +6101,12 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, case 0: case -NFS4ERR_WRONGSEC: case -NFS4ERR_NOTSUPP: - break; + goto out; default: err = nfs4_handle_exception(server, err, &exception); } } while (exception.retry); +out: return err; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a58eed784aed..66020acf2ecd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -935,7 +935,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BAD_SEQID: if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) return; - printk(KERN_WARNING "NFS: v4 server returned a bad" + pr_warn_ratelimited("NFS: v4 server returned a bad" " sequence-id error on an" " unconfirmed sequence %p!\n", seqid->sequence); @@ -1071,12 +1071,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 { struct nfs_client *clp = server->nfs_client; - if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) - nfs_async_inode_return_delegation(state->inode, &state->stateid); nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } +void nfs_inode_find_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + bool found = false; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + continue; + nfs4_state_mark_reclaim_nograce(clp, state); + found = true; + } + spin_unlock(&inode->i_lock); + if (found) + nfs4_schedule_state_manager(clp); +} + + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { struct inode *inode = state->inode; @@ -1739,7 +1764,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" + pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index be177f702acb..d6c078ea1489 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -54,7 +54,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, ei->ns_ops = ns_ops; ei->ns = ns; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, NULL)) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a6b62173d4c3..53c3bcea56cf 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -188,20 +188,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static int proc_sys_open(struct inode *inode, struct file *filp) { + struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + /* sysctl was unregistered */ + if (IS_ERR(head)) + return PTR_ERR(head); + if (table->poll) filp->private_data = proc_sys_poll_event(table->poll); + sysctl_head_finish(head); + return 0; } static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) { struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; - unsigned long event = (unsigned long)filp->private_data; unsigned int ret = DEFAULT_POLLMASK; + unsigned long event; + + /* sysctl was unregistered */ + if (IS_ERR(head)) + return POLLERR | POLLHUP; if (!table->proc_handler) goto out; @@ -209,6 +221,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) if (!table->poll) goto out; + event = (unsigned long)filp->private_data; poll_wait(filp, &table->poll->wait, wait); if (event != atomic_read(&table->poll->event)) { @@ -217,6 +230,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) } out: + sysctl_head_finish(head); + return ret; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7dcd2a250495..3efa7253523e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -409,6 +409,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } else { spin_unlock(&walk->mm->page_table_lock); } + + if (pmd_trans_unstable(pmd)) + return 0; /* * The mmap_sem held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things @@ -507,6 +510,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, struct page *page; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -670,6 +675,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, int err = 0; split_huge_page_pmd(walk->mm, pmd); + if (pmd_trans_unstable(pmd)) + return 0; /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); @@ -961,6 +968,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, spin_unlock(&walk->mm->page_table_lock); } + if (pmd_trans_unstable(pmd)) + return 0; orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { struct page *page = can_gather_numa_stats(*pte, md->vma, addr); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index deb804bec9df..9db61a41c1ad 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec void *old_secdata; size_t old_secdata_len; - iattrs = sd->s_iattr; - if (!iattrs) - iattrs = sysfs_init_inode_attrs(sd); - if (!iattrs) - return -ENOMEM; + if (!sd->s_iattr) { + sd->s_iattr = sysfs_init_inode_attrs(sd); + if (!sd->s_iattr) + return -ENOMEM; + } + iattrs = sd->s_iattr; old_secdata = iattrs->ia_secdata; old_secdata_len = iattrs->ia_secdata_len; diff --git a/fs/udf/file.c b/fs/udf/file.c index dca0c3881e82..d567b8448dfc 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -201,12 +201,10 @@ out: static int udf_release_file(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE) { - mutex_lock(&inode->i_mutex); down_write(&UDF_I(inode)->i_data_sem); udf_discard_prealloc(inode); udf_truncate_tail_extent(inode); up_write(&UDF_I(inode)->i_data_sem); - mutex_unlock(&inode->i_mutex); } return 0; } diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 0fa98b1c70ea..cfc4277ebda3 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -353,9 +353,20 @@ xfs_iget_cache_miss( BUG(); } - spin_lock(&pag->pag_ici_lock); + /* + * These values must be set before inserting the inode into the radix + * tree as the moment it is inserted a concurrent lookup (allowed by the + * RCU locking mechanism) can find it and that lookup must see that this + * is an inode currently under construction (i.e. that XFS_INEW is set). + * The ip->i_flags_lock that protects the XFS_INEW flag forms the + * memory barrier that ensures this detection works correctly at lookup + * time. + */ + ip->i_udquot = ip->i_gdquot = NULL; + xfs_iflags_set(ip, XFS_INEW); /* insert the new inode */ + spin_lock(&pag->pag_ici_lock); error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); @@ -363,11 +374,6 @@ xfs_iget_cache_miss( error = EAGAIN; goto out_preload_end; } - - /* These values _must_ be set before releasing the radix tree lock! */ - ip->i_udquot = ip->i_gdquot = NULL; - xfs_iflags_set(ip, XFS_INEW); - spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 541a508adea1..4f5d0ce7854d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3161,37 +3161,26 @@ xlog_recover_process_iunlinks( */ continue; } + /* + * Unlock the buffer so that it can be acquired in the normal + * course of the transaction to truncate and free each inode. + * Because we are not racing with anyone else here for the AGI + * buffer, we don't even need to hold it locked to read the + * initial unlinked bucket entries out of the buffer. We keep + * buffer reference though, so that it stays pinned in memory + * while we need the buffer. + */ agi = XFS_BUF_TO_AGI(agibp); + xfs_buf_unlock(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - /* - * Release the agi buffer so that it can - * be acquired in the normal course of the - * transaction to truncate and free the inode. - */ - xfs_buf_relse(agibp); - agino = xlog_recover_process_one_iunlink(mp, agno, agino, bucket); - - /* - * Reacquire the agibuffer and continue around - * the loop. This should never fail as we know - * the buffer was good earlier on. - */ - error = xfs_read_agi(mp, NULL, agno, &agibp); - ASSERT(error == 0); - agi = XFS_BUF_TO_AGI(agibp); } } - - /* - * Release the buffer for the current agi so we can - * go on to the next one. - */ - xfs_buf_relse(agibp); + xfs_buf_rele(agibp); } mp->m_dmevmask = mp_dmevmask; |