From d1d274cd1ce2eb979c66233a36a7a92abfc0f18f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 21 Nov 2011 17:31:02 -0600 Subject: eCryptfs: Prevent file create race condition commit b59db43ad4434519feb338eacb01d77eb50825c5 upstream. The file creation path prematurely called d_instantiate() and unlock_new_inode() before the eCryptfs inode info was fully allocated and initialized and before the eCryptfs metadata was written to the lower file. This could result in race conditions in subsequent file and inode operations leading to unexpected error conditions or a null pointer dereference while attempting to use the unallocated memory. https://launchpad.net/bugs/813146 Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 22 +++++++++--------- fs/ecryptfs/ecryptfs_kernel.h | 5 +++-- fs/ecryptfs/inode.c | 52 ++++++++++++++++++++++++++----------------- 3 files changed, 46 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 58609bde3b9f..203a1fdff666 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -967,7 +967,7 @@ static void ecryptfs_set_default_crypt_stat_vals( /** * ecryptfs_new_file_context - * @ecryptfs_dentry: The eCryptfs dentry + * @ecryptfs_inode: The eCryptfs inode * * If the crypto context for the file has not yet been established, * this is where we do that. Establishing a new crypto context @@ -984,13 +984,13 @@ static void ecryptfs_set_default_crypt_stat_vals( * * Returns zero on success; non-zero otherwise */ -int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) +int ecryptfs_new_file_context(struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat = - &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( - ecryptfs_dentry->d_sb)->mount_crypt_stat; + ecryptfs_inode->i_sb)->mount_crypt_stat; int cipher_name_len; int rc = 0; @@ -1299,12 +1299,12 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max, } static int -ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry, +ecryptfs_write_metadata_to_contents(struct inode *ecryptfs_inode, char *virt, size_t virt_len) { int rc; - rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, + rc = ecryptfs_write_lower(ecryptfs_inode, virt, 0, virt_len); if (rc < 0) printk(KERN_ERR "%s: Error attempting to write header " @@ -1338,7 +1338,8 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask, /** * ecryptfs_write_metadata - * @ecryptfs_dentry: The eCryptfs dentry + * @ecryptfs_dentry: The eCryptfs dentry, which should be negative + * @ecryptfs_inode: The newly created eCryptfs inode * * Write the file headers out. This will likely involve a userspace * callout, in which the session key is encrypted with one or more @@ -1348,10 +1349,11 @@ static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask, * * Returns zero on success; non-zero on error */ -int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) +int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat = - &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; unsigned int order; char *virt; size_t virt_len; @@ -1391,7 +1393,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt, size); else - rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt, + rc = ecryptfs_write_metadata_to_contents(ecryptfs_inode, virt, virt_len); if (rc) { printk(KERN_ERR "%s: Error writing metadata out to lower file; " diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b36c5572b3f3..9ce1e92c7d93 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -584,9 +584,10 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode); int ecryptfs_encrypt_page(struct page *page); int ecryptfs_decrypt_page(struct page *page); -int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); +int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode); int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); -int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); +int ecryptfs_new_file_context(struct inode *ecryptfs_inode); void ecryptfs_write_crypt_stat_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, size_t *written); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 11f8582d7218..528da01fec2b 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -172,22 +172,23 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode, * it. It will also update the eCryptfs directory inode to mimic the * stat of the lower directory inode. * - * Returns zero on success; non-zero on error condition + * Returns the new eCryptfs inode on success; an ERR_PTR on error condition */ -static int +static struct inode * ecryptfs_do_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, int mode) { int rc; struct dentry *lower_dentry; struct dentry *lower_dir_dentry; + struct inode *inode; lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); lower_dir_dentry = lock_parent(lower_dentry); if (IS_ERR(lower_dir_dentry)) { ecryptfs_printk(KERN_ERR, "Error locking directory of " "dentry\n"); - rc = PTR_ERR(lower_dir_dentry); + inode = ERR_CAST(lower_dir_dentry); goto out; } rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, @@ -195,20 +196,19 @@ ecryptfs_do_create(struct inode *directory_inode, if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); + inode = ERR_PTR(rc); goto out_lock; } - rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, - directory_inode->i_sb); - if (rc) { - ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); + inode = __ecryptfs_get_inode(lower_dentry->d_inode, + directory_inode->i_sb); + if (IS_ERR(inode)) goto out_lock; - } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); out_lock: unlock_dir(lower_dir_dentry); out: - return rc; + return inode; } /** @@ -219,26 +219,26 @@ out: * * Returns zero on success */ -static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) +static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat = - &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; int rc = 0; - if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { + if (S_ISDIR(ecryptfs_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); goto out; } ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); - rc = ecryptfs_new_file_context(ecryptfs_dentry); + rc = ecryptfs_new_file_context(ecryptfs_inode); if (rc) { ecryptfs_printk(KERN_ERR, "Error creating new file " "context; rc = [%d]\n", rc); goto out; } - rc = ecryptfs_get_lower_file(ecryptfs_dentry, - ecryptfs_dentry->d_inode); + rc = ecryptfs_get_lower_file(ecryptfs_dentry, ecryptfs_inode); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the lower file for the dentry with name " @@ -246,10 +246,10 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) ecryptfs_dentry->d_name.name, rc); goto out; } - rc = ecryptfs_write_metadata(ecryptfs_dentry); + rc = ecryptfs_write_metadata(ecryptfs_dentry, ecryptfs_inode); if (rc) printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); - ecryptfs_put_lower_file(ecryptfs_dentry->d_inode); + ecryptfs_put_lower_file(ecryptfs_inode); out: return rc; } @@ -269,18 +269,28 @@ static int ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, int mode, struct nameidata *nd) { + struct inode *ecryptfs_inode; int rc; - /* ecryptfs_do_create() calls ecryptfs_interpose() */ - rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode); - if (unlikely(rc)) { + ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry, + mode); + if (unlikely(IS_ERR(ecryptfs_inode))) { ecryptfs_printk(KERN_WARNING, "Failed to create file in" "lower filesystem\n"); + rc = PTR_ERR(ecryptfs_inode); goto out; } /* At this point, a file exists on "disk"; we need to make sure * that this on disk file is prepared to be an ecryptfs file */ - rc = ecryptfs_initialize_file(ecryptfs_dentry); + rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); + if (rc) { + drop_nlink(ecryptfs_inode); + unlock_new_inode(ecryptfs_inode); + iput(ecryptfs_inode); + goto out; + } + d_instantiate(ecryptfs_dentry, ecryptfs_inode); + unlock_new_inode(ecryptfs_inode); out: return rc; } -- cgit v1.2.3 From 19c8acbc4a4d444d72d256a7541b040cebddc78b Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 21 Nov 2011 17:31:29 -0600 Subject: eCryptfs: Flush file in vma close commit 32001d6fe9ac6b0423e674a3093aa56740849f3b upstream. Dirty pages weren't being written back when an mmap'ed eCryptfs file was closed before the mapping was unmapped. Since f_ops->flush() is not called by the munmap() path, the lower file was simply being released. This patch flushes the eCryptfs file in the vm_ops->close() path. https://launchpad.net/bugs/870326 Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/file.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index c6ac98cf9baa..d3f95f941c47 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -139,6 +139,27 @@ out: return rc; } +static void ecryptfs_vma_close(struct vm_area_struct *vma) +{ + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +static const struct vm_operations_struct ecryptfs_file_vm_ops = { + .close = ecryptfs_vma_close, + .fault = filemap_fault, +}; + +static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int rc; + + rc = generic_file_mmap(file, vma); + if (!rc) + vma->vm_ops = &ecryptfs_file_vm_ops; + + return rc; +} + struct kmem_cache *ecryptfs_file_info_cache; /** @@ -349,7 +370,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = ecryptfs_file_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, -- cgit v1.2.3 From 722daca8514aacaf7ed1311f2cc8e518af675049 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 23 Nov 2011 11:31:24 -0600 Subject: eCryptfs: Extend array bounds for all filename chars commit 0f751e641a71157aa584c2a2e22fda52b52b8a56 upstream. From mhalcrow's original commit message: Characters with ASCII values greater than the size of filename_rev_map[] are valid filename characters. ecryptfs_decode_from_filename() will access kernel memory beyond that array, and ecryptfs_parse_tag_70_packet() will then decrypt those characters. The attacker, using the FNEK of the crafted file, can then re-encrypt the characters to reveal the kernel memory past the end of the filename_rev_map[] array. I expect low security impact since this array is statically allocated in the text area, and the amount of memory past the array that is accessible is limited by the largest possible ASCII filename character. This patch solves the issue reported by mhalcrow but with an implementation suggested by Linus to simply extend the length of filename_rev_map[] to 256. Characters greater than 0x7A are mapped to 0x00, which is how invalid characters less than 0x7A were previously being handled. Signed-off-by: Tyler Hicks Reported-by: Michael Halcrow Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 203a1fdff666..2a834255c75d 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1945,7 +1945,7 @@ static unsigned char *portable_filename_chars = ("-.0123456789ABCD" /* We could either offset on every reverse map or just pad some 0x00's * at the front here */ -static const unsigned char filename_rev_map[] = { +static const unsigned char filename_rev_map[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */ @@ -1961,7 +1961,7 @@ static const unsigned char filename_rev_map[] = { 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */ 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */ 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */ - 0x3D, 0x3E, 0x3F + 0x3D, 0x3E, 0x3F /* 123 - 255 initialized to 0x00 */ }; /** -- cgit v1.2.3 From 01da30a885baa667801facc2a54c80346ab6c414 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Nov 2011 19:22:24 -0500 Subject: ext4: fix racy use-after-free in ext4_end_io_dio() commit 4c81f045c0bd2cbb78cc6446a4cd98038fe11a2e upstream. ext4_end_io_dio() queues io_end->work and then clears iocb->private; however, io_end->work calls aio_complete() which frees the iocb object. If that slab object gets reallocated, then ext4_end_io_dio() can end up clearing someone else's iocb->private, this use-after-free can cause a leak of a struct ext4_io_end_t structure. Detected and tested with slab poisoning. [ Note: Can also reproduce using 12 fio's against 12 file systems with the following configuration file: [global] direct=1 ioengine=libaio iodepth=1 bs=4k ba=4k size=128m [create] filename=${TESTDIR} rw=write -- tytso ] Google-Bug-Id: 5354697 Signed-off-by: Tejun Heo Signed-off-by: "Theodore Ts'o" Reported-by: Kent Overstreet Tested-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 89c47f4ee005..b644b9c164a7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2656,8 +2656,8 @@ out: spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); /* queue the work to convert unwritten extents to written */ - queue_work(wq, &io_end->work); iocb->private = NULL; + queue_work(wq, &io_end->work); /* XXX: probably should move into the real I/O completion handler */ inode_dio_done(inode); -- cgit v1.2.3 From 686da49e5aa50117d8d824c579c3fd9e0318fbc6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 1 Dec 2011 17:27:39 -0600 Subject: xfs: don't serialise direct IO reads on page cache checks commit 0c38a2512df272b14ef4238b476a2e4f70da1479 upstream. There is no need to grab the i_mutex of the IO lock in exclusive mode if we don't need to invalidate the page cache. Taking these locks on every direct IO effective serialises them as taking the IO lock in exclusive mode has to wait for all shared holders to drop the lock. That only happens when IO is complete, so effective it prevents dispatch of concurrent direct IO reads to the same inode. Fix this by taking the IO lock shared to check the page cache state, and only then drop it and take the IO lock exclusively if there is work to be done. Hence for the normal direct IO case, no exclusive locking will occur. Signed-off-by: Dave Chinner Tested-by: Joern Engel Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder Cc: Ben Myers Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7f7b42469ea7..8fd4a0708d30 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -317,7 +317,19 @@ xfs_file_aio_read( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (unlikely(ioflags & IO_ISDIRECT)) { + /* + * Locking is a bit tricky here. If we take an exclusive lock + * for direct IO, we effectively serialise all new concurrent + * read IO to this file and block it behind IO that is currently in + * progress because IO in progress holds the IO lock shared. We only + * need to hold the lock exclusive to blow away the page cache, so + * only take lock exclusively if the page cache needs invalidation. + * This allows the normal direct IO case of no page cache pages to + * proceeed concurrently without serialisation. + */ + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { @@ -330,8 +342,7 @@ xfs_file_aio_read( } } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - } else - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + } trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); -- cgit v1.2.3 From 6a426248da83e544995fc8a494630285260ec4ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Dec 2011 17:27:40 -0600 Subject: xfs: avoid direct I/O write vs buffered I/O race commit c58cb165bd44de8aaee9755a144136ae743be116 upstream. Currently a buffered reader or writer can add pages to the pagecache while we are waiting for the iolock in xfs_file_dio_aio_write. Prevent this by re-checking mapping->nrpages after we got the iolock, and if nessecary upgrade the lock to exclusive mode. To simplify this a bit only take the ilock inside of xfs_file_aio_write_checks. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder Cc: Ben Myers Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8fd4a0708d30..b7e75c6ba094 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -677,6 +677,7 @@ xfs_file_aio_write_checks( xfs_fsize_t new_size; int error = 0; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); if (error) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); @@ -768,14 +769,24 @@ xfs_file_dio_aio_write( *iolock = XFS_IOLOCK_EXCL; else *iolock = XFS_IOLOCK_SHARED; - xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + xfs_rw_ilock(ip, *iolock); ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); if (ret) return ret; + /* + * Recheck if there are cached pages that need invalidate after we got + * the iolock to protect against other threads adding new pages while + * we were waiting for the iolock. + */ + if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) { + xfs_rw_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, *iolock); + } + if (mapping->nrpages) { - WARN_ON(*iolock != XFS_IOLOCK_EXCL); ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (ret) @@ -820,7 +831,7 @@ xfs_file_buffered_aio_write( size_t count = ocount; *iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + xfs_rw_ilock(ip, *iolock); ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); if (ret) -- cgit v1.2.3 From b4dd4c13166d1aab0c2cd8033f1cc1211f6f1678 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Thu, 1 Dec 2011 17:27:41 -0600 Subject: xfs: Return -EIO when xfs_vn_getattr() failed commit ed32201e65e15f3e6955cb84cbb544b08f81e5a5 upstream. An attribute of inode can be fetched via xfs_vn_getattr() in XFS. Currently it returns EIO, not negative value, when it failed. As a result, the system call returns not negative value even though an error occured. The stat(2), ls and mv commands cannot handle this error and do not work correctly. This patch fixes this bug, and returns -EIO, not EIO when an error is detected in xfs_vn_getattr(). Signed-off-by: Mitsuo Hayasaka Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder Cc: Ben Myers Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 673704fab748..474920b4655d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -465,7 +465,7 @@ xfs_vn_getattr( trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -XFS_ERROR(EIO); stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; -- cgit v1.2.3 From 7f9fae139e1b34b0ecb0ff8ab051c4497d68db2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Dec 2011 17:27:42 -0600 Subject: xfs: fix buffer flushing during unmount commit 87c7bec7fc3377b3873eb3a0f4b603981ea16ebb upstream. The code to flush buffers in the umount code is a bit iffy: we first flush all delwri buffers out, but then might be able to queue up a new one when logging the sb counts. On a normal shutdown that one would get flushed out when doing the synchronous superblock write in xfs_unmountfs_writesb, but we skip that one if the filesystem has been shut down. Fix this by moving the delwri list flushing until just before unmounting the log, and while we're at it also remove the superflous delwri list and buffer lru flusing for the rt and log device that can never have cached or delwri buffers. Signed-off-by: Christoph Hellwig Reported-by: Amit Sahrawat Tested-by: Amit Sahrawat Signed-off-by: Alex Elder Cc: Ben Myers Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.h | 1 - fs/xfs/xfs_mount.c | 29 ++++++++++------------------- 2 files changed, 10 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 620972b8094d..8e8b06b90b61 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -320,7 +320,6 @@ extern struct list_head *xfs_get_buftarg_list(void); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) -#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) #define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) #endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0081657ad985..d4d5775d6e23 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -44,9 +44,6 @@ #include "xfs_trace.h" -STATIC void xfs_unmountfs_wait(xfs_mount_t *); - - #ifdef HAVE_PERCPU_SB STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); @@ -1496,11 +1493,6 @@ xfs_unmountfs( */ xfs_log_force(mp, XFS_LOG_SYNC); - xfs_binval(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - xfs_binval(mp->m_rtdev_targp); - } - /* * Unreserve any blocks we have so that when we unmount we don't account * the reserved free space as used. This is really only necessary for @@ -1526,7 +1518,16 @@ xfs_unmountfs( xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); xfs_unmountfs_writesb(mp); - xfs_unmountfs_wait(mp); /* wait for async bufs */ + + /* + * Make sure all buffers have been flushed and completed before + * unmounting the log. + */ + error = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (error) + xfs_warn(mp, "%d busy buffers during unmount.", error); + xfs_wait_buftarg(mp->m_ddev_targp); + xfs_log_unmount_write(mp); xfs_log_unmount(mp); xfs_uuid_unmount(mp); @@ -1537,16 +1538,6 @@ xfs_unmountfs( xfs_free_perag(mp); } -STATIC void -xfs_unmountfs_wait(xfs_mount_t *mp) -{ - if (mp->m_logdev_targp != mp->m_ddev_targp) - xfs_wait_buftarg(mp->m_logdev_targp); - if (mp->m_rtdev_targp) - xfs_wait_buftarg(mp->m_rtdev_targp); - xfs_wait_buftarg(mp->m_ddev_targp); -} - int xfs_fs_writable(xfs_mount_t *mp) { -- cgit v1.2.3 From c38aeb8cd119fddc37d7cd648d6c43e782711247 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 1 Dec 2011 17:27:43 -0600 Subject: xfs: Fix possible memory corruption in xfs_readlink commit b52a360b2aa1c59ba9970fb0f52bbb093fcc7a24 upstream. Fixes a possible memory corruption when the link is larger than MAXPATHLEN and XFS_DEBUG is not enabled. This also remove the S_ISLNK assert, since the inode mode is checked previously in xfs_readlink_by_handle() and via VFS. Updated to address concerns raised by Ben Hutchings about the loose attention paid to 32- vs 64-bit values, and the lack of handling a potentially negative pathlen value: - Changed type of "pathlen" to be xfs_fsize_t, to match that of ip->i_d.di_size - Added checking for a negative pathlen to the too-long pathlen test, and generalized the message that gets reported in that case to reflect the change As a result, if a negative pathlen were encountered, this function would return EFSCORRUPTED (and would fail an assertion for a debug build)--just as would a too-long pathlen. Signed-off-by: Alex Elder Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Cc: Ben Myers Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_vnodeops.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 51fc429527bc..b9e28730bbd6 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -113,7 +113,7 @@ xfs_readlink( char *link) { xfs_mount_t *mp = ip->i_mount; - int pathlen; + xfs_fsize_t pathlen; int error = 0; trace_xfs_readlink(ip); @@ -123,13 +123,19 @@ xfs_readlink( xfs_ilock(ip, XFS_ILOCK_SHARED); - ASSERT(S_ISLNK(ip->i_d.di_mode)); - ASSERT(ip->i_d.di_size <= MAXPATHLEN); - pathlen = ip->i_d.di_size; if (!pathlen) goto out; + if (pathlen < 0 || pathlen > MAXPATHLEN) { + xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", + __func__, (unsigned long long) ip->i_ino, + (long long) pathlen); + ASSERT(0); + return XFS_ERROR(EFSCORRUPTED); + } + + if (ip->i_df.if_flags & XFS_IFINLINE) { memcpy(link, ip->i_df.if_u1.if_data, pathlen); link[pathlen] = '\0'; -- cgit v1.2.3 From 5635019b754fa0ccfc2275369b05142a8a2dbde4 Mon Sep 17 00:00:00 2001 From: Mitsuo Hayasaka Date: Thu, 1 Dec 2011 17:27:44 -0600 Subject: xfs: use doalloc flag in xfs_qm_dqattach_one() commit db3e74b582915d66e10b0c73a62763418f54c340 upstream. The doalloc arg in xfs_qm_dqattach_one() is a flag that indicates whether a new area to handle quota information will be allocated if needed. Originally, it was passed to xfs_qm_dqget(), but has been removed by the following commit (probably by mistake): commit 8e9b6e7fa4544ea8a0e030c8987b918509c8ff47 Author: Christoph Hellwig Date: Sun Feb 8 21:51:42 2009 +0100 xfs: remove the unused XFS_QMOPT_DQLOCK flag As the result, xfs_qm_dqget() called from xfs_qm_dqattach_one() never allocates the new area even if it is needed. This patch gives the doalloc arg to xfs_qm_dqget() in xfs_qm_dqattach_one() to fix this problem. Signed-off-by: Mitsuo Hayasaka Cc: Alex Elder Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_qm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 9a0aa76facdf..95ba6dc885a7 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -674,7 +674,8 @@ xfs_qm_dqattach_one( * disk and we didn't ask it to allocate; * ESRCH if quotas got turned off suddenly. */ - error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); + error = xfs_qm_dqget(ip->i_mount, ip, id, type, + doalloc | XFS_QMOPT_DOWARN, &dqp); if (error) return error; -- cgit v1.2.3