From 3e8e77367e833de476abd739847ef47b53dced11 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Mar 2014 15:38:32 -0800 Subject: ocfs2: fix quota file corruption commit 15c34a760630ca2c803848fba90ca0646a9907dd upstream. Global quota files are accessed from different nodes. Thus we cannot cache offset of quota structure in the quota file after we drop our node reference count to it because after that moment quota structure may be freed and reallocated elsewhere by a different node resulting in corruption of quota file. Fix the problem by clearing dq_off when we are releasing dquot structure. We also remove the DB_READ_B handling because it is useless - DQ_ACTIVE_B is set iff DQ_READ_B is set. Signed-off-by: Jan Kara Cc: Goldwyn Rodrigues Cc: Joel Becker Reviewed-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/quota_global.c | 27 +++++++++++++++++---------- fs/ocfs2/quota_local.c | 4 ---- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 332a281f217e..e49b4f1cb26b 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) */ if (status < 0) mlog_errno(status); + /* + * Clear dq_off so that we search for the structure in quota file next + * time we acquire it. The structure might be deleted and reallocated + * elsewhere by another node while our dquot structure is on freelist. + */ + dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); @@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - status = ocfs2_qinfo_lock(info, 0); - if (status < 0) - goto out_dq; - status = qtree_read_dquot(&info->dqi_gi, dquot); - ocfs2_qinfo_unlock(info, 0); - if (status < 0) - goto out_dq; - } - set_bit(DQ_READ_B, &dquot->dq_flags); + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + /* + * We always want to read dquot structure from disk because we don't + * know what happened with it while it was on freelist. + */ + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 27fe7ee4874c..d0f323da0b5c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); out: - /* Clear the read bit so that next time someone uses this - * dquot he reads fresh info from disk and allocates local - * dquot structure */ - clear_bit(DQ_READ_B, &dquot->dq_flags); return status; } -- cgit v1.2.3 From 52ed96fc71877b15d1090313cc7b63264bdb466b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Feb 2014 15:18:55 -0500 Subject: ocfs2 syncs the wrong range... commit 1b56e98990bcdbb20b9fab163654b9315bf158e8 upstream. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ff54014a24ec..46387e49aa46 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2374,8 +2374,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2388,8 +2388,8 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* -- cgit v1.2.3 From 40cb674a5a0cff49a46af150fb862229e545e908 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Mar 2014 22:03:12 -0500 Subject: NFS: Fix a delegation callback race commit 755a48a7a4eb05b9c8424e3017d947b2961a60e0 upstream. The clean-up in commit 36281caa839f ended up removing a NULL pointer check that is needed in order to prevent an Oops in nfs_async_inode_return_delegation(). Reported-by: "Yan, Zheng" Link: http://lkml.kernel.org/r/5313E9F6.2020405@intel.com Fixes: 36281caa839f (NFSv4: Further clean-ups of delegation stateid validation) Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/delegation.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 57db3244f4d9..4b49a8c6ccad 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -656,16 +656,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * -- cgit v1.2.3 From c8dd8fdf0bd8c858d30ba3889104e226e865cade Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Mar 2014 08:44:23 -0500 Subject: NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid commit e1253be0ece1a95a02c7f5843194877471af8179 upstream. When nfs4_set_rw_stateid() can fails by returning EIO to indicate that the stateid is completely invalid, then it makes no sense to have it trigger a retry of the READ or WRITE operation. Instead, we should just have it fall through and attempt a recovery. This fixes an infinite loop in which the client keeps replaying the same bad stateid back to the server. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26e71bdb5b33..1ae7dd5956c5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3607,8 +3607,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, { nfs4_stateid current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; return nfs4_stateid_match(stateid, ¤t_stateid); } -- cgit v1.2.3 From 63490f40e071a1b03b6ceca4b654a4d5aeff851d Mon Sep 17 00:00:00 2001 From: Artem Fetishev Date: Mon, 10 Mar 2014 15:49:45 -0700 Subject: fs/proc/base.c: fix GPF in /proc/$PID/map_files commit 70335abb2689c8cd5df91bf2d95a65649addf50b upstream. The expected logic of proc_map_files_get_link() is either to return 0 and initialize 'path' or return an error and leave 'path' uninitialized. By the time dname_to_vma_addr() returns 0 the corresponding vma may have already be gone. In this case the path is not initialized but the return value is still 0. This results in 'general protection fault' inside d_path(). Steps to reproduce: CONFIG_CHECKPOINT_RESTORE=y fd = open(...); while (1) { mmap(fd, ...); munmap(fd, ...); } ls -la /proc/$PID/map_files Addresses https://bugzilla.kernel.org/show_bug.cgi?id=68991 Signed-off-by: Artem Fetishev Signed-off-by: Aleksandr Terekhov Reported-by: Acked-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Reviewed-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..de12b8128b95 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1825,6 +1825,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { -- cgit v1.2.3 From 0e48f06cf0a1b55cd2dcc432ccfb13174705fcc2 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 8 Feb 2014 15:47:46 +0000 Subject: Btrfs: fix data corruption when reading/updating compressed extents commit a2aa75e18a21b21952dc6daa9bac7c9f4426f81f upstream. When using a mix of compressed file extents and prealloc extents, it is possible to fill a page of a file with random, garbage data from some unrelated previous use of the page, instead of a sequence of zeroes. A simple sequence of steps to get into such case, taken from the test case I made for xfstests, is: _scratch_mkfs _scratch_mount "-o compress-force=lzo" $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar This results in the following file items in the fs tree: item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160 inode generation 6 transid 6 size 542872 block group 0 mode 100600 item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16 inode ref index 2 namelen 6 name: foobar item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53 extent data disk byte 0 nr 0 gen 6 extent data offset 0 nr 24576 ram 266240 extent compression 0 item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53 prealloc data disk byte 12849152 nr 241664 gen 6 prealloc data offset 0 nr 241664 item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53 extent data disk byte 12845056 nr 4096 gen 6 extent data offset 0 nr 20480 ram 20480 extent compression 2 item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53 prealloc data disk byte 13090816 nr 405504 gen 6 prealloc data offset 0 nr 258048 The on disk extent at offset 266240 (which corresponds to 1 single disk block), contains 5 compressed chunks of file data. Each of the first 4 compress 4096 bytes of file data, while the last one only compresses 3024 bytes of file data. Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 = 1072 bytes) should always return zeroes (our next extent is a prealloc one). The solution here is the compression code path to zero the remaining (untouched) bytes of the last page it uncompressed data into, as the information about how much space the file data consumes in the last page is not known in the upper layer fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing the remainder of the page but only if it corresponds to the last page of the inode and if the inode's size is not a multiple of the page size. This would cause not only returning random data on reads, but also permanently storing random data when updating parts of the region that should be zeroed. For the example above, it means updating a single byte in the region [285648 ; 286720[ would store that byte correctly but also store random data on disk. A test case for xfstests follows soon. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/compression.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b189bd1e7a3e..ce7067881d36 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1009,6 +1009,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + if (*pg_index == (vcnt - 1) && *pg_offset == 0) + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); -- cgit v1.2.3 From f1352fb2a38e6d57c9ec7f96c5449db616e04ef2 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 31 Jan 2014 15:41:58 -0500 Subject: Fix mountpoint reference leakage in linkat commit d22e6338db7f613dd4f6095c190682fcc519e4b7 upstream. Recent changes to retry on ESTALE in linkat (commit 442e31ca5a49e398351b2954b51f578353fdf210) introduced a mountpoint reference leak and a small memory leak in case a filesystem link operation returns ESTALE which is pretty normal for distributed filesystems like lustre, nfs and so on. Free old_path in such a case. [AV: there was another missing path_put() nearby - on the previous goto retry] Signed-off-by: Oleg Drokin: Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index cccaf77e76c5..1211ee5a1cb3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3655,6 +3655,7 @@ retry: out_dput: done_path_create(&new_path, new_dentry); if (retry_estale(error, how)) { + path_put(&old_path); how |= LOOKUP_REVAL; goto retry; } -- cgit v1.2.3 From 68c52c3ef819b289473200e232bb940dbe87fc48 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 21 Jan 2014 20:32:05 -0800 Subject: bio-integrity: Fix bio_integrity_verify segment start bug commit 5837c80e870bc3b12ac6a98cdc9ce7a9522a8fb6 upstream. This patch addresses a bug in bio_integrity_verify() code that has been causing DIF READ verify operations to be silently skipped. The issue is that bio->bi_idx will have been incremented within bio_advance() code in the normal blk_update_request() -> req_bio_endio() completion path, and bio_integrity_verify() is using bio_for_each_segment() which starts the bio segment walk at the current bio->bi_idx. So instead use bio_for_each_segment_all() to always start the bio segment walk from zero, regardless of the current bio->bi_idx value after bio_advance() has been called. (Context change for v3.10.y -> v3.13.y code - nab) Cc: Martin K. Petersen Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/bio-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8dccf73025b3..433c3b828e1d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -458,7 +458,7 @@ static int bio_integrity_verify(struct bio *bio) bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; - bio_for_each_segment(bv, bio, i) { + bio_for_each_segment_all(bv, bio, i) { void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; -- cgit v1.2.3 From 0a0ae7b3fb0fb301da83f7c7da38807c76b2b869 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Mar 2014 10:20:01 -0400 Subject: ext4: atomically set inode->i_flags in ext4_set_inode_flags() commit 00a1a053ebe5febcfc2ec498bd894f035ad2aa06 upstream. Use cmpxchg() to atomically set i_flags instead of clearing out the S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race where an immutable file has the immutable flag cleared for a brief window of time. Reported-by: John Sullivan Signed-off-by: "Theodore Ts'o" Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 21dff8f236f6..f9e11df768d5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" @@ -4044,18 +4045,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) void ext4_set_inode_flags(struct inode *inode) { unsigned int flags = EXT4_I(inode)->i_flags; + unsigned int new_fl = 0; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (flags & EXT4_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & EXT4_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & EXT4_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ -- cgit v1.2.3 From def52acc90faab583b124f3177d55c15d125e2d1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 3 Mar 2014 15:38:18 -0800 Subject: mm: close PageTail race commit 668f9abbd4334e6c29fa8acd71635c4f9101caa7 upstream. Commit bf6bddf1924e ("mm: introduce compaction and migration for ballooned pages") introduces page_count(page) into memory compaction which dereferences page->first_page if PageTail(page). This results in a very rare NULL pointer dereference on the aforementioned page_count(page). Indeed, anything that does compound_head(), including page_count() is susceptible to racing with prep_compound_page() and seeing a NULL or dangling page->first_page pointer. This patch uses Andrea's implementation of compound_trans_head() that deals with such a race and makes it the default compound_head() implementation. This includes a read memory barrier that ensures that if PageTail(head) is true that we return a head page that is neither NULL nor dangling. The patch then adds a store memory barrier to prep_compound_page() to ensure page->first_page is set. This is the safest way to ensure we see the head page that we are expecting, PageTail(page) is already in the unlikely() path and the memory barriers are unfortunately required. Hugetlbfs is the exception, we don't enforce a store memory barrier during init since no race is possible. Signed-off-by: David Rientjes Cc: Holger Kiehl Cc: Christoph Lameter Cc: Rafael Aquini Cc: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Rik van Riel Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/page.c b/fs/proc/page.c index b8730d9ebaee..2a8cc94bb641 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page) * just checks PG_head/PG_tail, so we need to check PageLRU to make * sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) + else if (PageTransCompound(page) && PageLRU(compound_head(page))) u |= 1 << KPF_THP; /* -- cgit v1.2.3 From 39305a6ac73ca6e8349773d032cdb5336d42196f Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Thu, 3 Apr 2014 14:46:22 -0700 Subject: backing_dev: fix hung task on sync commit 6ca738d60c563d5c6cf6253ee4b8e76fa77b2b9e upstream. bdi_wakeup_thread_delayed() used the mod_delayed_work() function to schedule work to writeback dirty inodes. The problem with this is that it can delay work that is scheduled for immediate execution, such as the work from sync_inodes_sb(). This can happen since mod_delayed_work() can now steal work from a work_queue. This fixes the problem by using queue_delayed_work() instead. This is a regression caused by commit 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue"). The reason that this causes a problem is that laptop-mode will change the delay, dirty_writeback_centisecs, to 60000 (10 minutes) by default. In the case that bdi_wakeup_thread_delayed() races with sync_inodes_sb(), sync will be stopped for 10 minutes and trigger a hung task. Even if dirty_writeback_centisecs is not long enough to cause a hung task, we still don't want to delay sync for that long. We fix the problem by using queue_delayed_work() when we want to schedule writeback sometime in future. This function doesn't change the timer if it is already armed. For the same reason, we also change bdi_writeback_workfn() to immediately queue the work again in the case that the work_list is not empty. The same problem can happen if the sync work is run on the rescue worker. [jack@suse.cz: update changelog, add comment, use bdi_wakeup_thread_delayed()] Signed-off-by: Derek Basehore Reviewed-by: Jan Kara Cc: Alexander Viro Reviewed-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: "Darrick J. Wong" Cc: Derek Basehore Cc: Kees Cook Cc: Benson Leung Cc: Sonny Rao Cc: Luigi Semenzato Cc: Jens Axboe Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e3ab1e4dc442..f79f641de4ff 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1038,10 +1038,10 @@ void bdi_writeback_workfn(struct work_struct *work) trace_writeback_pages_written(pages_written); } - if (!list_empty(&bdi->work_list) || - (wb_has_dirty_io(wb) && dirty_writeback_interval)) - queue_delayed_work(bdi_wq, &wb->dwork, - msecs_to_jiffies(dirty_writeback_interval * 10)); + if (!list_empty(&bdi->work_list)) + mod_delayed_work(bdi_wq, &wb->dwork, 0); + else if (wb_has_dirty_io(wb) && dirty_writeback_interval) + bdi_wakeup_thread_delayed(bdi); current->flags &= ~PF_SWAPWRITE; } -- cgit v1.2.3 From bf0972039ddc483a9cb79edae73076c635876568 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 Apr 2014 14:46:23 -0700 Subject: bdi: avoid oops on device removal commit 5acda9d12dcf1ad0d9a5a2a7c646de3472fa7555 upstream. After commit 839a8e8660b6 ("writeback: replace custom worker pool implementation with unbound workqueue") when device is removed while we are writing to it we crash in bdi_writeback_workfn() -> set_worker_desc() because bdi->dev is NULL. This can happen because even though bdi_unregister() cancels all pending flushing work, nothing really prevents new ones from being queued from balance_dirty_pages() or other places. Fix the problem by clearing BDI_registered bit in bdi_unregister() and checking it before scheduling of any flushing work. Fixes: 839a8e8660b6777e7fe4e80af1a048aebe2b5977 Reviewed-by: Tejun Heo Signed-off-by: Jan Kara Cc: Derek Basehore Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f79f641de4ff..387213ac2608 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -87,16 +87,29 @@ static inline struct inode *wb_inode(struct list_head *head) #define CREATE_TRACE_POINTS #include +static void bdi_wakeup_thread(struct backing_dev_info *bdi) +{ + spin_lock_bh(&bdi->wb_lock); + if (test_bit(BDI_registered, &bdi->state)) + mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + spin_unlock_bh(&bdi->wb_lock); +} + static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { trace_writeback_queue(bdi, work); spin_lock_bh(&bdi->wb_lock); + if (!test_bit(BDI_registered, &bdi->state)) { + if (work->done) + complete(work->done); + goto out_unlock; + } list_add_tail(&work->list, &bdi->work_list); - spin_unlock_bh(&bdi->wb_lock); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); +out_unlock: + spin_unlock_bh(&bdi->wb_lock); } static void @@ -112,7 +125,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { trace_writeback_nowork(bdi); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + bdi_wakeup_thread(bdi); return; } @@ -159,7 +172,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) * writeback as soon as there is no other work to do. */ trace_writeback_wake_background(bdi); - mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); + bdi_wakeup_thread(bdi); } /* @@ -1016,7 +1029,7 @@ void bdi_writeback_workfn(struct work_struct *work) current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || - list_empty(&bdi->bdi_list))) { + !test_bit(BDI_registered, &bdi->state))) { /* * The normal path. Keep writing back @bdi until its * work_list is empty. Note that this path is also taken -- cgit v1.2.3 From 19fc37ed7b9e8cdde28597adb9714d6f863985b4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 5 Feb 2014 16:34:38 +0900 Subject: Btrfs: skip submitting barrier for missing device commit f88ba6a2a44ee98e8d59654463dc157bb6d13c43 upstream. I got an error on v3.13: BTRFS error (device sdf1) in write_all_supers:3378: errno=-5 IO failure (errors while submitting device barriers.) how to reproduce: > mkfs.btrfs -f -d raid1 /dev/sdf1 /dev/sdf2 > wipefs -a /dev/sdf2 > mount -o degraded /dev/sdf1 /mnt > btrfs balance start -f -sconvert=single -mconvert=single -dconvert=single /mnt The reason of the error is that barrier_all_devices() failed to submit barrier to the missing device. However it is clear that we cannot do anything on missing device, and also it is not necessary to care chunks on the missing device. This patch stops sending/waiting barrier if device is missing. Signed-off-by: Hidetoshi Seto Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8b60b660c8f..4354b9127713 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3161,6 +3161,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* send down all the barriers */ head = &info->fs_devices->devices; list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; if (!dev->bdev) { errors_send++; continue; @@ -3175,6 +3177,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* wait for all the barriers */ list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; if (!dev->bdev) { errors_wait++; continue; -- cgit v1.2.3 From cbe099ed98247f60f18a828edd1b0bd07560b460 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Wed, 19 Feb 2014 18:52:39 -0500 Subject: ext4: fix error return from ext4_ext_handle_uninitialized_extents() commit ce37c42919608e96ade3748fe23c3062a0a966c5 upstream. Commit 3779473246 breaks the return of error codes from ext4_ext_handle_uninitialized_extents() in ext4_ext_map_blocks(). A portion of the patch assigns that function's signed integer return value to an unsigned int. Consequently, negatively valued error codes are lost and can be treated as a bogus allocated block count. Signed-off-by: Eric Whitney Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a2b625e279db..141d0fce318d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4032,7 +4032,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent newex, *ex, *ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0; - int free_on_err = 0, err = 0, depth; + int free_on_err = 0, err = 0, depth, ret; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; @@ -4093,9 +4093,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (!ext4_ext_is_uninitialized(ex)) goto out; - allocated = ext4_ext_handle_uninitialized_extents( + ret = ext4_ext_handle_uninitialized_extents( handle, inode, map, path, flags, allocated, newblock); + if (ret < 0) + err = ret; + else + allocated = ret; goto out3; } } -- cgit v1.2.3 From 8459d9c808d91f2170e83ac45fef6225392d98db Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Thu, 13 Mar 2014 23:34:16 -0400 Subject: ext4: fix partial cluster handling for bigalloc file systems commit c06344939422bbd032ac967223a7863de57496b5 upstream. Commit 9cb00419fa, which enables hole punching for bigalloc file systems, exposed a bug introduced by commit 6ae06ff51e in an earlier release. When run on a bigalloc file system, xfstests generic/013, 068, 075, 083, 091, 100, 112, 127, 263, 269, and 270 fail with e2fsck errors or cause kernel error messages indicating that previously freed blocks are being freed again. The latter commit optimizes the selection of the starting extent in ext4_ext_rm_leaf() when hole punching by beginning with the extent supplied in the path argument rather than with the last extent in the leaf node (as is still done when truncating). However, the code in rm_leaf that initially sets partial_cluster to track cluster sharing on extent boundaries is only guaranteed to run if rm_leaf starts with the last node in the leaf. Consequently, partial_cluster is not correctly initialized when hole punching, and a cluster on the boundary of a punched region that should be retained may instead be deallocated. Signed-off-by: Eric Whitney Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 141d0fce318d..84d817b842a8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2511,6 +2511,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); + /* + * If we're starting with an extent other than the last one in the + * node, we need to see if it shares a cluster with the extent to + * the right (towards the end of the file). If its leftmost cluster + * is this extent's rightmost cluster and it is not cluster aligned, + * we'll mark it as a partial that is not to be deallocated. + */ + + if (ex != EXT_LAST_EXTENT(eh)) { + ext4_fsblk_t current_pblk, right_pblk; + long long current_cluster, right_cluster; + + current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; + current_cluster = (long long)EXT4_B2C(sbi, current_pblk); + right_pblk = ext4_ext_pblock(ex + 1); + right_cluster = (long long)EXT4_B2C(sbi, right_pblk); + if (current_cluster == right_cluster && + EXT4_PBLK_COFF(sbi, right_pblk)) + *partial_cluster = -right_cluster; + } + trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); while (ex >= EXT_FIRST_EXTENT(eh) && -- cgit v1.2.3 From 5734144cd1e407869063a82d9c741e1092c354db Mon Sep 17 00:00:00 2001 From: Kamlakant Patel Date: Mon, 6 Jan 2014 19:06:54 +0530 Subject: jffs2: Fix segmentation fault found in stress test commit 3367da5610c50e6b83f86d366d72b41b350b06a2 upstream. Creating a large file on a JFFS2 partition sometimes crashes with this call trace: [ 306.476000] CPU 13 Unable to handle kernel paging request at virtual address c0000000dfff8002, epc == ffffffffc03a80a8, ra == ffffffffc03a8044 [ 306.488000] Oops[#1]: [ 306.488000] Cpu 13 [ 306.492000] $ 0 : 0000000000000000 0000000000000000 0000000000008008 0000000000008007 [ 306.500000] $ 4 : c0000000dfff8002 000000000000009f c0000000e0007cde c0000000ee95fa58 [ 306.508000] $ 8 : 0000000000000001 0000000000008008 0000000000010000 ffffffffffff8002 [ 306.516000] $12 : 0000000000007fa9 000000000000ff0e 000000000000ff0f 80e55930aebb92bb [ 306.524000] $16 : c0000000e0000000 c0000000ee95fa5c c0000000efc80000 ffffffffc09edd70 [ 306.532000] $20 : ffffffffc2b60000 c0000000ee95fa58 0000000000000000 c0000000efc80000 [ 306.540000] $24 : 0000000000000000 0000000000000004 [ 306.548000] $28 : c0000000ee950000 c0000000ee95f738 0000000000000000 ffffffffc03a8044 [ 306.556000] Hi : 00000000000574a5 [ 306.560000] Lo : 6193b7a7e903d8c9 [ 306.564000] epc : ffffffffc03a80a8 jffs2_rtime_compress+0x98/0x198 [ 306.568000] Tainted: G W [ 306.572000] ra : ffffffffc03a8044 jffs2_rtime_compress+0x34/0x198 [ 306.580000] Status: 5000f8e3 KX SX UX KERNEL EXL IE [ 306.584000] Cause : 00800008 [ 306.588000] BadVA : c0000000dfff8002 [ 306.592000] PrId : 000c1100 (Netlogic XLP) [ 306.596000] Modules linked in: [ 306.596000] Process dd (pid: 170, threadinfo=c0000000ee950000, task=c0000000ee6e0858, tls=0000000000c47490) [ 306.608000] Stack : 7c547f377ddc7ee4 7ffc7f967f5d7fae 7f617f507fc37ff4 7e7d7f817f487f5f 7d8e7fec7ee87eb3 7e977ff27eec7f9e 7d677ec67f917f67 7f3d7e457f017ed7 7fd37f517f867eb2 7fed7fd17ca57e1d 7e5f7fe87f257f77 7fd77f0d7ede7fdb 7fba7fef7e197f99 7fde7fe07ee37eb5 7f5c7f8c7fc67f65 7f457fb87f847e93 7f737f3e7d137cd9 7f8e7e9c7fc47d25 7dbb7fac7fb67e52 7ff17f627da97f64 7f6b7df77ffa7ec5 80057ef17f357fb3 7f767fa27dfc7fd5 7fe37e8e7fd07e53 7e227fcf7efb7fa1 7f547e787fa87fcc 7fcb7fc57f5a7ffb 7fc07f6c7ea97e80 7e2d7ed17e587ee0 7fb17f9d7feb7f31 7f607e797e887faa 7f757fdd7c607ff3 7e877e657ef37fbd 7ec17fd67fe67ff7 7ff67f797ff87dc4 7eef7f3a7c337fa6 7fe57fc97ed87f4b 7ebe7f097f0b8003 7fe97e2a7d997cba 7f587f987f3c7fa9 ... [ 306.676000] Call Trace: [ 306.680000] [] jffs2_rtime_compress+0x98/0x198 [ 306.684000] [] jffs2_selected_compress+0x110/0x230 [ 306.692000] [] jffs2_compress+0x5c/0x388 [ 306.696000] [] jffs2_write_inode_range+0xd8/0x388 [ 306.704000] [] jffs2_write_end+0x16c/0x2d0 [ 306.708000] [] generic_file_buffered_write+0xf8/0x2b8 [ 306.716000] [] __generic_file_aio_write+0x1ac/0x350 [ 306.720000] [] generic_file_aio_write+0x80/0x168 [ 306.728000] [] do_sync_write+0x94/0xf8 [ 306.732000] [] vfs_write+0xa4/0x1a0 [ 306.736000] [] SyS_write+0x50/0x90 [ 306.744000] [] handle_sys+0x180/0x1a0 [ 306.748000] [ 306.748000] Code: 020b202d 0205282d 90a50000 <90840000> 14a40038 00000000 0060602d 0000282d 016c5823 [ 306.760000] ---[ end trace 79dd088435be02d0 ]--- Segmentation fault This crash is caused because the 'positions' is declared as an array of signed short. The value of position is in the range 0..65535, and will be converted to a negative number when the position is greater than 32767 and causes a corruption and crash. Changing the definition to 'unsigned short' fixes this issue Signed-off-by: Jayachandran C Signed-off-by: Kamlakant Patel Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/compr_rtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c index 16a5047903a6..406d9cc84ba8 100644 --- a/fs/jffs2/compr_rtime.c +++ b/fs/jffs2/compr_rtime.c @@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in, unsigned char *cpage_out, uint32_t *sourcelen, uint32_t *dstlen) { - short positions[256]; + unsigned short positions[256]; int outpos = 0; int pos=0; @@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in, unsigned char *cpage_out, uint32_t srclen, uint32_t destlen) { - short positions[256]; + unsigned short positions[256]; int outpos = 0; int pos=0; -- cgit v1.2.3 From 16668c2b1a12ae83427f78b2be165865d782df0b Mon Sep 17 00:00:00 2001 From: Ajesh Kunhipurayil Vijayan Date: Mon, 6 Jan 2014 19:06:55 +0530 Subject: jffs2: Fix crash due to truncation of csize commit 41bf1a24c1001f4d0d41a78e1ac575d2f14789d7 upstream. mounting JFFS2 partition sometimes crashes with this call trace: [ 1322.240000] Kernel bug detected[#1]: [ 1322.244000] Cpu 2 [ 1322.244000] $ 0 : 0000000000000000 0000000000000018 000000003ff00070 0000000000000001 [ 1322.252000] $ 4 : 0000000000000000 c0000000f3980150 0000000000000000 0000000000010000 [ 1322.260000] $ 8 : ffffffffc09cd5f8 0000000000000001 0000000000000088 c0000000ed300de8 [ 1322.268000] $12 : e5e19d9c5f613a45 ffffffffc046d464 0000000000000000 66227ba5ea67b74e [ 1322.276000] $16 : c0000000f1769c00 c0000000ed1e0200 c0000000f3980150 0000000000000000 [ 1322.284000] $20 : c0000000f3a80000 00000000fffffffc c0000000ed2cfbd8 c0000000f39818f0 [ 1322.292000] $24 : 0000000000000004 0000000000000000 [ 1322.300000] $28 : c0000000ed2c0000 c0000000ed2cfab8 0000000000010000 ffffffffc039c0b0 [ 1322.308000] Hi : 000000000000023c [ 1322.312000] Lo : 000000000003f802 [ 1322.316000] epc : ffffffffc039a9f8 check_tn_node+0x88/0x3b0 [ 1322.320000] Not tainted [ 1322.324000] ra : ffffffffc039c0b0 jffs2_do_read_inode_internal+0x1250/0x1e48 [ 1322.332000] Status: 5400f8e3 KX SX UX KERNEL EXL IE [ 1322.336000] Cause : 00800034 [ 1322.340000] PrId : 000c1004 (Netlogic XLP) [ 1322.344000] Modules linked in: [ 1322.348000] Process jffs2_gcd_mtd7 (pid: 264, threadinfo=c0000000ed2c0000, task=c0000000f0e68dd8, tls=0000000000000000) [ 1322.356000] Stack : c0000000f1769e30 c0000000ed010780 c0000000ed010780 c0000000ed300000 c0000000f1769c00 c0000000f3980150 c0000000f3a80000 00000000fffffffc c0000000ed2cfbd8 ffffffffc039c0b0 ffffffffc09c6340 0000000000001000 0000000000000dec ffffffffc016c9d8 c0000000f39805a0 c0000000f3980180 0000008600000000 0000000000000000 0000000000000000 0000000000000000 0001000000000dec c0000000f1769d98 c0000000ed2cfb18 0000000000010000 0000000000010000 0000000000000044 c0000000f3a80000 c0000000f1769c00 c0000000f3d207a8 c0000000f1769d98 c0000000f1769de0 ffffffffc076f9c0 0000000000000009 0000000000000000 0000000000000000 ffffffffc039cf90 0000000000000017 ffffffffc013fbdc 0000000000000001 000000010003e61c ... [ 1322.424000] Call Trace: [ 1322.428000] [] check_tn_node+0x88/0x3b0 [ 1322.432000] [] jffs2_do_read_inode_internal+0x1250/0x1e48 [ 1322.440000] [] jffs2_do_crccheck_inode+0x70/0xd0 [ 1322.448000] [] jffs2_garbage_collect_pass+0x160/0x870 [ 1322.452000] [] jffs2_garbage_collect_thread+0xdc/0x1f0 [ 1322.460000] [] kthread+0xb8/0xc0 [ 1322.464000] [] kernel_thread_helper+0x10/0x18 [ 1322.472000] [ 1322.472000] Code: 67bd0050 94a4002c 2c830001 <00038036> de050218 2403fffc 0080a82d 00431824 24630044 [ 1322.480000] ---[ end trace b052bb90e97dfbf5 ]--- The variable csize in structure jffs2_tmp_dnode_info is of type uint16_t, but it is used to hold the compressed data length(csize) which is declared as uint32_t. So, when the value of csize exceeds 16bits, it gets truncated when assigned to tn->csize. This is causing a kernel BUG. Changing the definition of csize in jffs2_tmp_dnode_info to uint32_t fixes the issue. Signed-off-by: Ajesh Kunhipurayil Vijayan Signed-off-by: Kamlakant Patel Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/nodelist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index e4619b00f7c5..fa35ff79ab35 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info uint32_t version; uint32_t data_crc; uint32_t partial_crc; - uint16_t csize; + uint32_t csize; uint16_t overlapped; }; -- cgit v1.2.3 From 3a196f46fc0d37392ba686df66b94a1edc0b8523 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 12 Feb 2014 12:44:56 -0800 Subject: jffs2: avoid soft-lockup in jffs2_reserve_space_gc() commit 13b546d96207c131eeae15dc7b26c6e7d0f1cad7 upstream. We triggered soft-lockup under stress test on 2.6.34 kernel. BUG: soft lockup - CPU#1 stuck for 60009ms! [lockf2.test:14488] ... [] (jffs2_do_reserve_space+0x420/0x440 [jffs2]) [] (jffs2_reserve_space_gc+0x34/0x78 [jffs2]) [] (jffs2_garbage_collect_dnode.isra.3+0x264/0x478 [jffs2]) [] (jffs2_garbage_collect_pass+0x9c0/0xe4c [jffs2]) [] (jffs2_reserve_space+0x104/0x2a8 [jffs2]) [] (jffs2_write_inode_range+0x5c/0x4d4 [jffs2]) [] (jffs2_write_end+0x198/0x2c0 [jffs2]) [] (generic_file_buffered_write+0x158/0x200) [] (__generic_file_aio_write+0x3a4/0x414) [] (generic_file_aio_write+0x5c/0xbc) [] (do_sync_write+0x98/0xd4) [] (vfs_write+0xa8/0x150) [] (sys_write+0x3c/0xc0)] Fix this by adding a cond_resched() in the while loop. [akpm@linux-foundation.org: don't initialize `ret'] Signed-off-by: Li Zefan Cc: David Woodhouse Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/nodemgmt.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 03310721712f..41789e6fa6fe 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -211,20 +211,25 @@ out: int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *len, uint32_t sumsize) { - int ret = -EAGAIN; + int ret; minsize = PAD(minsize); jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); - spin_lock(&c->erase_completion_lock); - while(ret == -EAGAIN) { + while (true) { + spin_lock(&c->erase_completion_lock); ret = jffs2_do_reserve_space(c, minsize, len, sumsize); if (ret) { jffs2_dbg(1, "%s(): looping, ret is %d\n", __func__, ret); } + spin_unlock(&c->erase_completion_lock); + + if (ret == -EAGAIN) + cond_resched(); + else + break; } - spin_unlock(&c->erase_completion_lock); if (!ret) ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); -- cgit v1.2.3 From cc8ece834baaac84cffafad31dc25c1f73ef6add Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 12 Feb 2014 12:44:57 -0800 Subject: jffs2: remove from wait queue after schedule() commit 3ead9578443b66ddb3d50ed4f53af8a0c0298ec5 upstream. @wait is a local variable, so if we don't remove it from the wait queue list, later wake_up() may end up accessing invalid memory. This was spotted by eyes. Signed-off-by: Li Zefan Cc: David Woodhouse Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/nodemgmt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 41789e6fa6fe..b6bd4affd9ad 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, spin_unlock(&c->erase_completion_lock); schedule(); + remove_wait_queue(&c->erase_wait, &wait); } else spin_unlock(&c->erase_completion_lock); } else if (ret) -- cgit v1.2.3 From 0d267970ad9fb54e59f080d8ff80bcd1cd3a0039 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Fri, 4 Apr 2014 07:10:49 +1100 Subject: xfs: fix directory hash ordering bug commit c88547a8119e3b581318ab65e9b72f27f23e641d upstream. Commit f5ea1100 ("xfs: add CRCs to dir2/da node blocks") introduced in 3.10 incorrectly converted the btree hash index array pointer in xfs_da3_fixhashpath(). It resulted in the the current hash always being compared against the first entry in the btree rather than the current block index into the btree block's hash entry array. As a result, it was comparing the wrong hashes, and so could misorder the entries in the btree. For most cases, this doesn't cause any problems as it requires hash collisions to expose the ordering problem. However, when there are hash collisions within a directory there is a very good probability that the entries will be ordered incorrectly and that actually matters when duplicate hashes are placed into or removed from the btree block hash entry array. This bug results in an on-disk directory corruption and that results in directory verifier functions throwing corruption warnings into the logs. While no data or directory entries are lost, access to them may be compromised, and attempts to remove entries from a directory that has suffered from this corruption may result in a filesystem shutdown. xfs_repair will fix the directory hash ordering without data loss occuring. [dchinner: wrote useful a commit message] Reported-by: Hannes Frederic Sowa Signed-off-by: Mark Tinguely Reviewed-by: Ben Myers Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_da_btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index eca6f9d8a263..79ddbaf93206 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1334,7 +1334,7 @@ xfs_da3_fixhashpath( node = blk->bp->b_addr; xfs_da3_node_hdr_from_disk(&nodehdr, node); btree = xfs_da3_node_tree_p(node); - if (be32_to_cpu(btree->hashval) == lasthash) + if (be32_to_cpu(btree[blk->index].hashval) == lasthash) break; blk->hashval = lasthash; btree[blk->index].hashval = cpu_to_be32(lasthash); -- cgit v1.2.3 From c5393b70f9229b3e68146c84888cbfc8be002d72 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 28 Jan 2014 16:01:04 -0500 Subject: nfsd4: session needs room for following op to error out commit 4c69d5855a16f7378648c5733632628fa10431db upstream. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 27d74a294515..e0bfacd23a8f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1307,6 +1307,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { plen = opdesc->op_rsize_bop(rqstp, op); + /* + * If there's still another operation, make sure + * we'll have space to at least encode an error: + */ + if (resp->opcnt < args->opcnt) + plen += COMPOUND_ERR_SLACK_SPACE; op->status = nfsd4_check_resp_size(resp, plen); } -- cgit v1.2.3 From f4a0a32b202fd49d255bfc689828f0a79d364866 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 28 Jan 2014 16:05:15 -0500 Subject: nfsd4: buffer-length check for SUPPATTR_EXCLCREAT commit de3997a7eeb9ea286b15879fdf8a95aae065b4f7 upstream. This was an omission from 8c18f2052e756e7d5dea712fc6e7ed70c00e8a39 "nfsd41: SUPPATTR_EXCLCREAT attribute". Cc: Benny Halevy Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 582321a978b0..d7ea1938cdee 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2401,6 +2401,8 @@ out_acl: WRITE64(stat.ino); } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); -- cgit v1.2.3 From 5aad7571c2cf4366b1b80c7d22446458fe75f629 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Feb 2014 16:31:42 -0500 Subject: nfsd4: fix test_stateid error reply encoding commit a11fcce1544df08c723d950ff0edef3adac40405 upstream. If the entire operation fails then there's nothing to encode. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d7ea1938cdee..5188a38fef06 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3384,6 +3384,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; + if (nfserr) + return nfserr; + RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); *p++ = htonl(test_stateid->ts_num_ids); -- cgit v1.2.3 From b57d9c3fe362d805eb848098b7bb994a3b718ebf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 24 Feb 2014 14:59:47 -0500 Subject: nfsd: notify_change needs elevated write count commit 9f67f189939eccaa54f3d2c9cf10788abaf2d584 upstream. Looks like this bug has been here since these write counts were introduced, not sure why it was just noticed now. Thanks also to Jan Kara for pointing out the problem. Reported-by: Matthew Rahtz Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/vfs.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 62fd6616801d..d9b298cbfe5c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -406,6 +406,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, umode_t ftype = 0; __be32 err; int host_err; + bool get_write_count; int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) @@ -413,10 +414,18 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; + /* Callers that do fh_verify should do the fh_want_write: */ + get_write_count = !fhp->fh_dentry; + /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) goto out; + if (get_write_count) { + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + } dentry = fhp->fh_dentry; inode = dentry->d_inode; -- cgit v1.2.3 From 344f3a6bce11dd3d483d7e36dce33b22427ff621 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 26 Feb 2014 16:50:01 +0300 Subject: nfsd: check passed socket's net matches NFSd superblock's one commit 3064639423c48d6e0eb9ecc27c512a58e38c6c57 upstream. There could be a case, when NFSd file system is mounted in network, different to socket's one, like below: "ip netns exec" creates new network and mount namespace, which duplicates NFSd mount point, created in init_net context. And thus NFS server stop in nested network context leads to RPCBIND client destruction in init_net. Then, on NFSd start in nested network context, rpc.nfsd process creates socket in nested net and passes it into "write_ports", which leads to RPCBIND sockets creation in init_net context because of the same reason (NFSd monut point was created in init_net context). An attempt to register passed socket in nested net leads to panic, because no RPCBIND client present in nexted network namespace. This patch add check that passed socket's net matches NFSd superblock's one. And returns -EINVAL error to user psace otherwise. v2: Put socket on exit. Reported-by: Weng Meiling Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsctl.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7f555179bf81..f34d9de802ab 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) if (err != 0 || fd < 0) return -EINVAL; + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; + } + err = nfsd_create_serv(net); if (err != 0) return err; -- cgit v1.2.3 From 83caff8d1740d76609a9b1bc55247d17be44d85f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Mar 2014 14:17:55 -0400 Subject: nfsd4: fix setclientid encode size commit 480efaee085235bb848f1063f959bf144103c342 upstream. Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e0bfacd23a8f..f7d7d04674fb 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1477,7 +1477,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); + return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * + sizeof(__be32); } static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) -- cgit v1.2.3 From d8639f243850400d0e64b2b626903b0bcef925d2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 26 Mar 2014 22:09:30 +0800 Subject: NFSD: Traverse unconfirmed client through hash-table commit 2b9056359889c78ea5decb5b654a512c2e8a945c upstream. When stopping nfsd, I got BUG messages, and soft lockup messages, The problem is cuased by double rb_erase() in nfs4_state_destroy_net() and destroy_client(). This patch just let nfsd traversing unconfirmed client through hash-table instead of rbtree. [ 2325.021995] BUG: unable to handle kernel NULL pointer dereference at (null) [ 2325.022809] IP: [] rb_erase+0x14c/0x390 [ 2325.022982] PGD 7a91b067 PUD 7a33d067 PMD 0 [ 2325.022982] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 2325.022982] Modules linked in: nfsd(OF) cfg80211 rfkill bridge stp llc snd_intel8x0 snd_ac97_codec ac97_bus auth_rpcgss nfs_acl serio_raw e1000 i2c_piix4 ppdev snd_pcm snd_timer lockd pcspkr joydev parport_pc snd parport i2c_core soundcore microcode sunrpc ata_generic pata_acpi [last unloaded: nfsd] [ 2325.022982] CPU: 1 PID: 2123 Comm: nfsd Tainted: GF O 3.14.0-rc8+ #2 [ 2325.022982] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 2325.022982] task: ffff88007b384800 ti: ffff8800797f6000 task.ti: ffff8800797f6000 [ 2325.022982] RIP: 0010:[] [] rb_erase+0x14c/0x390 [ 2325.022982] RSP: 0018:ffff8800797f7d98 EFLAGS: 00010246 [ 2325.022982] RAX: ffff880079c1f010 RBX: ffff880079f4c828 RCX: 0000000000000000 [ 2325.022982] RDX: 0000000000000000 RSI: ffff880079bcb070 RDI: ffff880079f4c810 [ 2325.022982] RBP: ffff8800797f7d98 R08: 0000000000000000 R09: ffff88007964fc70 [ 2325.022982] R10: 0000000000000000 R11: 0000000000000400 R12: ffff880079f4c800 [ 2325.022982] R13: ffff880079bcb000 R14: ffff8800797f7da8 R15: ffff880079f4c860 [ 2325.022982] FS: 0000000000000000(0000) GS:ffff88007f900000(0000) knlGS:0000000000000000 [ 2325.022982] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 2325.022982] CR2: 0000000000000000 CR3: 000000007a3ef000 CR4: 00000000000006e0 [ 2325.022982] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2325.022982] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2325.022982] Stack: [ 2325.022982] ffff8800797f7de0 ffffffffa0191c6e ffff8800797f7da8 ffff8800797f7da8 [ 2325.022982] ffff880079f4c810 ffff880079bcb000 ffffffff81cc26c0 ffff880079c1f010 [ 2325.022982] ffff880079bcb070 ffff8800797f7e28 ffffffffa01977f2 ffff8800797f7df0 [ 2325.022982] Call Trace: [ 2325.022982] [] destroy_client+0x32e/0x3b0 [nfsd] [ 2325.022982] [] nfs4_state_shutdown_net+0x1a2/0x220 [nfsd] [ 2325.022982] [] nfsd_shutdown_net+0x38/0x70 [nfsd] [ 2325.022982] [] nfsd_last_thread+0x4e/0x80 [nfsd] [ 2325.022982] [] svc_shutdown_net+0x2b/0x30 [sunrpc] [ 2325.022982] [] nfsd_destroy+0x5b/0x80 [nfsd] [ 2325.022982] [] nfsd+0x103/0x130 [nfsd] [ 2325.022982] [] ? nfsd_destroy+0x80/0x80 [nfsd] [ 2325.022982] [] kthread+0xd2/0xf0 [ 2325.022982] [] ? insert_kthread_work+0x40/0x40 [ 2325.022982] [] ret_from_fork+0x7c/0xb0 [ 2325.022982] [] ? insert_kthread_work+0x40/0x40 [ 2325.022982] Code: 48 83 e1 fc 48 89 10 0f 84 02 01 00 00 48 3b 41 10 0f 84 08 01 00 00 48 89 51 08 48 89 fa e9 74 ff ff ff 0f 1f 40 00 48 8b 50 10 02 01 0f 84 93 00 00 00 48 8b 7a 10 48 85 ff 74 05 f6 07 01 [ 2325.022982] RIP [] rb_erase+0x14c/0x390 [ 2325.022982] RSP [ 2325.022982] CR2: 0000000000000000 [ 2325.022982] ---[ end trace 28c27ed011655e57 ]--- [ 228.064071] BUG: soft lockup - CPU#0 stuck for 22s! [nfsd:558] [ 228.064428] Modules linked in: ip6t_rpfilter ip6t_REJECT cfg80211 xt_conntrack rfkill ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw nfsd(OF) auth_rpcgss nfs_acl lockd snd_intel8x0 snd_ac97_codec ac97_bus joydev snd_pcm snd_timer e1000 sunrpc snd ppdev parport_pc serio_raw pcspkr i2c_piix4 microcode parport soundcore i2c_core ata_generic pata_acpi [ 228.064539] CPU: 0 PID: 558 Comm: nfsd Tainted: GF O 3.14.0-rc8+ #2 [ 228.064539] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 228.064539] task: ffff880076adec00 ti: ffff880074616000 task.ti: ffff880074616000 [ 228.064539] RIP: 0010:[] [] rb_next+0x27/0x50 [ 228.064539] RSP: 0018:ffff880074617de0 EFLAGS: 00000282 [ 228.064539] RAX: ffff880074478010 RBX: ffff88007446f860 RCX: 0000000000000014 [ 228.064539] RDX: ffff880074478010 RSI: 0000000000000000 RDI: ffff880074478010 [ 228.064539] RBP: ffff880074617de0 R08: 0000000000000000 R09: 0000000000000012 [ 228.064539] R10: 0000000000000001 R11: ffffffffffffffec R12: ffffea0001d11a00 [ 228.064539] R13: ffff88007f401400 R14: ffff88007446f800 R15: ffff880074617d50 [ 228.064539] FS: 0000000000000000(0000) GS:ffff88007f800000(0000) knlGS:0000000000000000 [ 228.064539] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 228.064539] CR2: 00007fe9ac6ec000 CR3: 000000007a5d6000 CR4: 00000000000006f0 [ 228.064539] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 228.064539] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 228.064539] Stack: [ 228.064539] ffff880074617e28 ffffffffa01ab7db ffff880074617df0 ffff880074617df0 [ 228.064539] ffff880079273000 ffffffff81cc26c0 ffffffff81cc26c0 0000000000000000 [ 228.064539] 0000000000000000 ffff880074617e48 ffffffffa01840b8 ffffffff81cc26c0 [ 228.064539] Call Trace: [ 228.064539] [] nfs4_state_shutdown_net+0x18b/0x220 [nfsd] [ 228.064539] [] nfsd_shutdown_net+0x38/0x70 [nfsd] [ 228.064539] [] nfsd_last_thread+0x4e/0x80 [nfsd] [ 228.064539] [] svc_shutdown_net+0x2b/0x30 [sunrpc] [ 228.064539] [] nfsd_destroy+0x5b/0x80 [nfsd] [ 228.064539] [] nfsd+0x103/0x130 [nfsd] [ 228.064539] [] ? nfsd_destroy+0x80/0x80 [nfsd] [ 228.064539] [] kthread+0xd2/0xf0 [ 228.064539] [] ? insert_kthread_work+0x40/0x40 [ 228.064539] [] ret_from_fork+0x7c/0xb0 [ 228.064539] [] ? insert_kthread_work+0x40/0x40 [ 228.064539] Code: 1f 44 00 00 55 48 8b 17 48 89 e5 48 39 d7 74 3b 48 8b 47 08 48 85 c0 75 0e eb 25 66 0f 1f 84 00 00 00 00 00 48 89 d0 48 8b 50 10 <48> 85 d2 75 f4 5d c3 66 90 48 3b 78 08 75 f6 48 8b 10 48 89 c7 Fixes: ac55fdc408039 (nfsd: move the confirmed and unconfirmed hlists...) Signed-off-by: Kinglong Mee Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 316ec843dec2..442509285ca9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4958,7 +4958,6 @@ nfs4_state_destroy_net(struct net *net) int i; struct nfs4_client *clp = NULL; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct rb_node *node, *tmp; for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->conf_id_hashtbl[i])) { @@ -4967,13 +4966,11 @@ nfs4_state_destroy_net(struct net *net) } } - node = rb_first(&nn->unconf_name_tree); - while (node != NULL) { - tmp = node; - node = rb_next(tmp); - clp = rb_entry(tmp, struct nfs4_client, cl_namenode); - rb_erase(tmp, &nn->unconf_name_tree); - destroy_client(clp); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->unconf_id_hashtbl[i])) { + clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } } kfree(nn->sessionid_hashtbl); -- cgit v1.2.3 From da05ff98dcc282ca0eebd849bc87594d6c28b506 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Apr 2014 08:51:48 -0400 Subject: nfsd: set timeparms.to_maxval in setup_callback_client commit 3758cf7e14b753838fe754ede3862af10b35fdac upstream. ...otherwise the logic in the timeout handling doesn't work correctly. Spotted-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4callback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7f05cd140de3..3eaa6e30a2dc 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -637,9 +637,11 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(clp->net), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { .net = clp->net, -- cgit v1.2.3 From 1d06be05aadf7f66d8415f8e803a28495082dcfe Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 2 Apr 2014 14:40:26 -0400 Subject: reiserfs: fix race in readdir commit 01d8885785a60ae8f4c37b0ed75bdc96d0fc6a44 upstream. jdm-20004 reiserfs_delete_xattrs: Couldn't delete all xattrs (-2) The -ENOENT is due to readdir calling dir_emit on the same entry twice. If the dir_emit callback sleeps and the tree is changed underneath us, we won't be able to trust deh_offset(deh) anymore. We need to save next_pos before we might sleep so we can find the next entry. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 6c2d136561cb..2b96b59f75da 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -128,6 +128,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, char *d_name; off_t d_off; ino_t d_ino; + loff_t cur_pos = deh_offset(deh); if (!de_visible(deh)) /* it is hidden entry */ @@ -200,8 +201,9 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, if (local_buf != small_buf) { kfree(local_buf); } - // next entry should be looked for with such offset - next_pos = deh_offset(deh) + 1; + + /* deh_offset(deh) may be invalid now. */ + next_pos = cur_pos + 1; if (item_moved(&tmp_ih, &path_to_entry)) { set_cpu_key_k_offset(&pos_key, -- cgit v1.2.3 From 261985c4398c6658e057842ba6c960c74f796d00 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 3 Apr 2014 14:46:49 -0700 Subject: ocfs2: dlm: fix lock migration crash commit 34aa8dac482f1358d59110d5e3a12f4351f6acaa upstream. This issue was introduced by commit 800deef3f6f8 ("ocfs2: use list_for_each_entry where benefical") in 2007 where it replaced list_for_each with list_for_each_entry. The variable "lock" will point to invalid data if "tmpq" list is empty and a panic will be triggered due to this. Sunil advised reverting it back, but the old version was also not right. At the end of the outer for loop, that list_for_each_entry will also set "lock" to an invalid data, then in the next loop, if the "tmpq" list is empty, "lock" will be an stale invalid data and cause the panic. So reverting the list_for_each back and reset "lock" to NULL to fix this issue. Another concern is that this seemes can not happen because the "tmpq" list should not be empty. Let me describe how. old lock resource owner(node 1): migratation target(node 2): image there's lockres with a EX lock from node 2 in granted list, a NR lock from node x with convert_type EX in converting list. dlm_empty_lockres() { dlm_pick_migration_target() { pick node 2 as target as its lock is the first one in granted list. } dlm_migrate_lockres() { dlm_mark_lockres_migrating() { res->state |= DLM_LOCK_RES_BLOCK_DIRTY; wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); //after the above code, we can not dirty lockres any more, // so dlm_thread shuffle list will not run downconvert lock from EX to NR upconvert lock from NR to EX <<< migration may schedule out here, then <<< node 2 send down convert request to convert type from EX to <<< NR, then send up convert request to convert type from NR to <<< EX, at this time, lockres granted list is empty, and two locks <<< in the converting list, node x up convert lock followed by <<< node 2 up convert lock. // will set lockres RES_MIGRATING flag, the following // lock/unlock can not run dlm_lockres_release_ast(dlm, res); } dlm_send_one_lockres() dlm_process_recovery_data() for (i=0; inum_locks; i++) if (ml->node == dlm->node_num) for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { list_for_each_entry(lock, tmpq, list) if (lock) break; <<< lock is invalid as grant list is empty. } if (lock->ml.node != ml->node) BUG() >>> crash here } I see the above locks status from a vmcore of our internal bug. Signed-off-by: Junxiao Bi Reviewed-by: Wengang Wang Cc: Sunil Mushran Reviewed-by: Srinivas Eeda Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmrecovery.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e68588e6b1e8..5abbc73d1a77 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1751,13 +1751,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_migratable_lockres *mres) { struct dlm_migratable_lock *ml; - struct list_head *queue; + struct list_head *queue, *iter; struct list_head *tmpq = NULL; struct dlm_lock *newlock = NULL; struct dlm_lockstatus *lksb = NULL; int ret = 0; int i, j, bad; - struct dlm_lock *lock = NULL; + struct dlm_lock *lock; u8 from = O2NM_MAX_NODES; unsigned int added = 0; __be64 c; @@ -1792,14 +1792,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* MIGRATION ONLY! */ BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); + lock = NULL; spin_lock(&res->spinlock); for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each_entry(lock, tmpq, list) { - if (lock->ml.cookie != ml->cookie) - lock = NULL; - else + list_for_each(iter, tmpq) { + lock = list_entry(iter, + struct dlm_lock, list); + if (lock->ml.cookie == ml->cookie) break; + lock = NULL; } if (lock) break; -- cgit v1.2.3 From a093bceda6927de680e1c16793c5dfc4df3d236f Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 3 Apr 2014 14:46:51 -0700 Subject: ocfs2: dlm: fix recovery hung commit ded2cf71419b9353060e633b59e446c42a6a2a09 upstream. There is a race window in dlm_do_recovery() between dlm_remaster_locks() and dlm_reset_recovery() when the recovery master nearly finish the recovery process for a dead node. After the master sends FINALIZE_RECO message in dlm_remaster_locks(), another node may become the recovery master for another dead node, and then send the BEGIN_RECO message to all the nodes included the old master, in the handler of this message dlm_begin_reco_handler() of old master, dlm->reco.dead_node and dlm->reco.new_master will be set to the second dead node and the new master, then in dlm_reset_recovery(), these two variables will be reset to default value. This will cause new recovery master can not finish the recovery process and hung, at last the whole cluster will hung for recovery. old recovery master: new recovery master: dlm_remaster_locks() become recovery master for another dead node. dlm_send_begin_reco_message() dlm_begin_reco_handler() { if (dlm->reco.state & DLM_RECO_STATE_FINALIZE) { return -EAGAIN; } dlm_set_reco_master(dlm, br->node_idx); dlm_set_reco_dead_node(dlm, br->dead_node); } dlm_reset_recovery() { dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); dlm_set_reco_master(dlm, O2NM_INVALID_NODE_NUM); } will hang in dlm_remaster_locks() for request dlm locks info Before send FINALIZE_RECO message, recovery master should set DLM_RECO_STATE_FINALIZE for itself and clear it after the recovery done, this can break the race windows as the BEGIN_RECO messages will not be handled before DLM_RECO_STATE_FINALIZE flag is cleared. A similar race may happen between new recovery master and normal node which is in dlm_finalize_reco_handler(), also fix it. Signed-off-by: Junxiao Bi Reviewed-by: Srinivas Eeda Reviewed-by: Wengang Wang Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmrecovery.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 5abbc73d1a77..9bd981cd3142 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -540,7 +540,10 @@ master_here: /* success! see if any other nodes need recovery */ mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", dlm->name, dlm->reco.dead_node, dlm->node_num); - dlm_reset_recovery(dlm); + spin_lock(&dlm->spinlock); + __dlm_reset_recovery(dlm); + dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; + spin_unlock(&dlm->spinlock); } dlm_end_recovery(dlm); @@ -698,6 +701,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) if (all_nodes_done) { int ret; + /* Set this flag on recovery master to avoid + * a new recovery for another dead node start + * before the recovery is not done. That may + * cause recovery hung.*/ + spin_lock(&dlm->spinlock); + dlm->reco.state |= DLM_RECO_STATE_FINALIZE; + spin_unlock(&dlm->spinlock); + /* all nodes are now in DLM_RECO_NODE_DATA_DONE state * just send a finalize message to everyone and * clean up */ @@ -2869,8 +2880,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, BUG(); } dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; + __dlm_reset_recovery(dlm); spin_unlock(&dlm->spinlock); - dlm_reset_recovery(dlm); dlm_kick_recovery_thread(dlm); break; default: -- cgit v1.2.3 From 5da43e877ea4e51b1096a8973ff6e7a03554fcb0 Mon Sep 17 00:00:00 2001 From: alex chen Date: Thu, 3 Apr 2014 14:47:05 -0700 Subject: ocfs2: do not put bh when buffer_uptodate failed commit f7cf4f5bfe073ad792ab49c04f247626b3e38db6 upstream. Do not put bh when buffer_uptodate failed in ocfs2_write_block and ocfs2_write_super_or_backup, because it will put bh in b_end_io. Otherwise it will hit a warning "VFS: brelse: Trying to free free buffer". Signed-off-by: Alex Chen Reviewed-by: Joseph Qi Reviewed-by: Srinivas Eeda Cc: Mark Fasheh Acked-by: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/buffer_head_io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5d18ad10c27f..4f66e007dae1 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, * information for this bh as it's not marked locally * uptodate. */ ret = -EIO; - put_bh(bh); mlog_errno(ret); } @@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, if (!buffer_uptodate(bh)) { ret = -EIO; - put_bh(bh); mlog_errno(ret); } -- cgit v1.2.3 From f78e2d2db6082212ca85a25e9d51d0bd126cb0a8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Apr 2014 10:54:21 -0400 Subject: ext4: fix jbd2 warning under heavy xattr load commit ec4cb1aa2b7bae18dd8164f2e9c7c51abcf61280 upstream. When heavily exercising xattr code the assertion that jbd2_journal_dirty_metadata() shouldn't return error was triggered: WARNING: at /srv/autobuild-ceph/gitbuilder.git/build/fs/jbd2/transaction.c:1237 jbd2_journal_dirty_metadata+0x1ba/0x260() CPU: 0 PID: 8877 Comm: ceph-osd Tainted: G W 3.10.0-ceph-00049-g68d04c9 #1 Hardware name: Dell Inc. PowerEdge R410/01V648, BIOS 1.6.3 02/07/2011 ffffffff81a1d3c8 ffff880214469928 ffffffff816311b0 ffff880214469968 ffffffff8103fae0 ffff880214469958 ffff880170a9dc30 ffff8802240fbe80 0000000000000000 ffff88020b366000 ffff8802256e7510 ffff880214469978 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x70/0xa0 [] warn_slowpath_null+0x1a/0x20 [] jbd2_journal_dirty_metadata+0x1ba/0x260 [] __ext4_handle_dirty_metadata+0xa3/0x140 [] ext4_xattr_release_block+0x103/0x1f0 [] ext4_xattr_block_set+0x1e0/0x910 [] ext4_xattr_set_handle+0x38b/0x4a0 [] ? trace_hardirqs_on+0xd/0x10 [] ext4_xattr_set+0xc2/0x140 [] ext4_xattr_user_set+0x47/0x50 [] generic_setxattr+0x6e/0x90 [] __vfs_setxattr_noperm+0x7b/0x1c0 [] vfs_setxattr+0xc4/0xd0 [] setxattr+0x13e/0x1e0 [] ? __sb_start_write+0xe7/0x1b0 [] ? mnt_want_write_file+0x28/0x60 [] ? fget_light+0x3c/0x130 [] ? mnt_want_write_file+0x28/0x60 [] ? __mnt_want_write+0x58/0x70 [] SyS_fsetxattr+0xbe/0x100 [] system_call_fastpath+0x16/0x1b The reason for the warning is that buffer_head passed into jbd2_journal_dirty_metadata() didn't have journal_head attached. This is caused by the following race of two ext4_xattr_release_block() calls: CPU1 CPU2 ext4_xattr_release_block() ext4_xattr_release_block() lock_buffer(bh); /* False */ if (BHDR(bh)->h_refcount == cpu_to_le32(1)) } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); unlock_buffer(bh); lock_buffer(bh); /* True */ if (BHDR(bh)->h_refcount == cpu_to_le32(1)) get_bh(bh); ext4_free_blocks() ... jbd2_journal_forget() jbd2_journal_unfile_buffer() -> JH is gone error = ext4_handle_dirty_xattr_block(handle, inode, bh); -> triggers the warning We fix the problem by moving ext4_handle_dirty_xattr_block() under the buffer lock. Sadly this cannot be done in nojournal mode as that function can call sync_dirty_buffer() which would deadlock. Luckily in nojournal mode the race is harmless (we only dirty already freed buffer) and thus for nojournal mode we leave the dirtying outside of the buffer lock. Reported-by: Sage Weil Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1423c4816a47..298e9c8da364 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -517,8 +517,8 @@ static void ext4_xattr_update_super_block(handle_t *handle, } /* - * Release the xattr block BH: If the reference count is > 1, decrement - * it; otherwise free the block. + * Release the xattr block BH: If the reference count is > 1, decrement it; + * otherwise free the block. */ static void ext4_xattr_release_block(handle_t *handle, struct inode *inode, @@ -538,16 +538,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, if (ce) mb_cache_entry_free(ce); get_bh(bh); + unlock_buffer(bh); ext4_free_blocks(handle, inode, bh, 0, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); - unlock_buffer(bh); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); if (ce) mb_cache_entry_release(ce); + /* + * Beware of this ugliness: Releasing of xattr block references + * from different inodes can race and so we have to protect + * from a race where someone else frees the block (and releases + * its journal_head) before we are done dirtying the buffer. In + * nojournal mode this race is harmless and we actually cannot + * call ext4_handle_dirty_xattr_block() with locked buffer as + * that function can call sync_dirty_buffer() so for that case + * we handle the dirtying after unlocking the buffer. + */ + if (ext4_handle_valid(handle)) + error = ext4_handle_dirty_xattr_block(handle, inode, + bh); unlock_buffer(bh); - error = ext4_handle_dirty_xattr_block(handle, inode, bh); + if (!ext4_handle_valid(handle)) + error = ext4_handle_dirty_xattr_block(handle, inode, + bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); -- cgit v1.2.3 From fcc742c294c9c189e7040a3de8528ac6f8ee13ef Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 12 Apr 2014 12:45:25 -0400 Subject: ext4: use i_size_read in ext4_unaligned_aio() commit 6e6358fc3c3c862bfe9a5bc029d3f8ce43dc9765 upstream. We haven't taken i_mutex yet, so we need to use i_size_read(). Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b19f0a457f32..4635788e14bf 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); loff_t final_size = pos + count; - if (pos >= inode->i_size) + if (pos >= i_size_read(inode)) return 0; if ((pos & blockmask) || (final_size & blockmask)) -- cgit v1.2.3 From 8771da18d27db1295c4dcc7bd54a71d441c1731b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Apr 2014 08:44:12 -0400 Subject: locks: allow __break_lease to sleep even when break_time is 0 commit 4991a628a789dc5954e98e79476d9808812292ec upstream. A fl->fl_break_time of 0 has a special meaning to the lease break code that basically means "never break the lease". knfsd uses this to ensure that leases don't disappear out from under it. Unfortunately, the code in __break_lease can end up passing this value to wait_event_interruptible as a timeout, which prevents it from going to sleep at all. This causes __break_lease to spin in a tight loop and causes soft lockups. Fix this by ensuring that we pass a minimum value of 1 as a timeout instead. Cc: J. Bruce Fields Reported-by: Terry Barnaby Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index cb424a4fed71..0274c953b07d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1243,11 +1243,10 @@ int __break_lease(struct inode *inode, unsigned int mode) restart: break_time = flock->fl_break_time; - if (break_time != 0) { + if (break_time != 0) break_time -= jiffies; - if (break_time == 0) - break_time++; - } + if (break_time == 0) + break_time++; locks_insert_block(flock, new_fl); unlock_flocks(); error = wait_event_interruptible_timeout(new_fl->fl_wait, -- cgit v1.2.3 From 72a6e594497032bd911bd187a88fae4b4473abb3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 25 Mar 2014 11:55:26 -0700 Subject: lockd: ensure we tear down any live sockets when socket creation fails during lockd_up commit 679b033df48422191c4cac52b610d9980e019f9b upstream. We had a Fedora ABRT report with a stack trace like this: kernel BUG at net/sunrpc/svc.c:550! invalid opcode: 0000 [#1] SMP [...] CPU: 2 PID: 913 Comm: rpc.nfsd Not tainted 3.13.6-200.fc20.x86_64 #1 Hardware name: Hewlett-Packard HP ProBook 4740s/1846, BIOS 68IRR Ver. F.40 01/29/2013 task: ffff880146b00000 ti: ffff88003f9b8000 task.ti: ffff88003f9b8000 RIP: 0010:[] [] svc_destroy+0x128/0x130 [sunrpc] RSP: 0018:ffff88003f9b9de0 EFLAGS: 00010206 RAX: ffff88003f829628 RBX: ffff88003f829600 RCX: 00000000000041ee RDX: 0000000000000000 RSI: 0000000000000286 RDI: 0000000000000286 RBP: ffff88003f9b9de8 R08: 0000000000017360 R09: ffff88014fa97360 R10: ffffffff8114ce57 R11: ffffea00051c9c00 R12: ffff88003f829600 R13: 00000000ffffff9e R14: ffffffff81cc7cc0 R15: 0000000000000000 FS: 00007f4fde284840(0000) GS:ffff88014fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4fdf5192f8 CR3: 00000000a569a000 CR4: 00000000001407e0 Stack: ffff88003f792300 ffff88003f9b9e18 ffffffffa02de02a 0000000000000000 ffffffff81cc7cc0 ffff88003f9cb000 0000000000000008 ffff88003f9b9e60 ffffffffa033bb35 ffffffff8131c86c ffff88003f9cb000 ffff8800a5715008 Call Trace: [] lockd_up+0xaa/0x330 [lockd] [] nfsd_svc+0x1b5/0x2f0 [nfsd] [] ? simple_strtoull+0x2c/0x50 [] ? write_pool_threads+0x280/0x280 [nfsd] [] write_threads+0x8b/0xf0 [nfsd] [] ? __get_free_pages+0x14/0x50 [] ? get_zeroed_page+0x16/0x20 [] ? simple_transaction_get+0xb1/0xd0 [] nfsctl_transaction_write+0x48/0x80 [nfsd] [] vfs_write+0xb4/0x1f0 [] ? putname+0x29/0x40 [] SyS_write+0x49/0xa0 [] ? __audit_syscall_exit+0x1f6/0x2a0 [] system_call_fastpath+0x16/0x1b Code: 31 c0 e8 82 db 37 e1 e9 2a ff ff ff 48 8b 07 8b 57 14 48 c7 c7 d5 c6 31 a0 48 8b 70 20 31 c0 e8 65 db 37 e1 e9 f4 fe ff ff 0f 0b <0f> 0b 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 RIP [] svc_destroy+0x128/0x130 [sunrpc] RSP Evidently, we created some lockd sockets and then failed to create others. make_socks then returned an error and we tried to tear down the svc, but svc->sv_permsocks was not empty so we ended up tripping over the BUG() in svc_destroy(). Fix this by ensuring that we tear down any live sockets we created when socket creation is going to return an error. Fixes: 786185b5f8abefa (SUNRPC: move per-net operations from...) Reported-by: Raphos Signed-off-by: Jeff Layton Reviewed-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a2aa97d45670..d56a9904e52a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -235,6 +235,7 @@ out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); + svc_shutdown_net(serv, net); return err; } -- cgit v1.2.3