From 1c1266bb916e6a6b362d3be95f2cc7f3c41277a6 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Wed, 12 Jan 2011 16:53:27 -0800 Subject: ceph: fix getattr on directory when using norbytes The norbytes mount option was broken, and when doing getattr on a directory it return the rbytes instead of the number of entities. This commit fixes it. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e791fa34b23d..50001de66c69 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -701,10 +701,6 @@ static int fill_inode(struct inode *inode, ci->i_ceph_flags |= CEPH_I_COMPLETE; ci->i_max_offset = 2; } - - /* it may be better to set st_size in getattr instead? */ - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) - inode->i_size = ci->i_rbytes; break; default: pr_err("fill_inode %llx.%llx BAD mode 0%o\n", @@ -1805,7 +1801,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, else stat->dev = 0; if (S_ISDIR(inode->i_mode)) { - stat->size = ci->i_rbytes; + if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), + RBYTES)) + stat->size = ci->i_rbytes; + else + stat->size = ci->i_files + ci->i_subdirs; stat->blocks = 0; stat->blksize = 65536; } -- cgit v1.2.3 From 17db143fc091238c43ab9f373974ca2224a4c3f8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 Jan 2011 15:27:29 -0800 Subject: ceph: fix xattr rbtree search Fix xattr name comparison in rbtree search for strings that share a prefix. The *name argument is null terminated, but the xattr name is not, so we need to use strncmp, but that means adjusting for the case where name is a prefix of xattr->name. The corresponding case in __set_xattr() already handles this properly (although in that case *name is also not null terminated). Reported-by: Sergiy Kibrik Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 6e12a6ba5f79..8c9eba6ef9df 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; + int name_len = strlen(name); int c; p = &ci->i_xattrs.index.rb_node; @@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, parent = *p; xattr = rb_entry(parent, struct ceph_inode_xattr, node); c = strncmp(name, xattr->name, xattr->name_len); + if (c == 0 && name_len > xattr->name_len) + c = 1; if (c < 0) p = &(*p)->rb_left; else if (c > 0) -- cgit v1.2.3 From 50aac4fec503960380ab594a93a6fbfdf3f8915f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 07:59:40 -0800 Subject: ceph: fix cap_wanted_delay_{min,max} mount option initialization These were initialized to 0 instead of the default, fallout from the RBD refactor in 3d14c5d2b6e15c21d8e5467dc62d33127c23a644. Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bf6f0f34082a..9c5085465a63 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; -- cgit v1.2.3 From 24be0c481067560b11441e794e27f166a3568863 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 08:48:06 -0800 Subject: ceph: fix erroneous cap flush to non-auth mds The int flushing is global and not clear on each iteration of the loop, which can cause a second flush of caps to any MDSs with ids greater than the auth. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 60d27bc9eb83..f654c7e933ac 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1658,6 +1658,8 @@ ack: if (cap == ci->i_auth_cap && ci->i_dirty_caps) flushing = __mark_caps_flushing(inode, session); + else + flushing = 0; mds = cap->mds; /* remember mds, so we don't repeat */ sent++; -- cgit v1.2.3 From 088b3f5e9ee2649f5cfc2f08d8ce654e3eeba310 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 08:56:01 -0800 Subject: ceph: fix flushing of caps vs cap import If we are mid-flush and a cap is migrated to another node, we need to resend the cap flush message to the new MDS, and do so with the original flush_seq to avoid leaking across a sync boundary. Previously we didn't redo the flush (we only flushed newly dirty data), which would cause a later sync to hang forever. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f654c7e933ac..7def3f5903dd 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1560,9 +1560,10 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ revoking = cap->implemented & ~cap->issued; - if (revoking) - dout(" mds%d revoking %s\n", cap->mds, - ceph_cap_string(revoking)); + dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented), + ceph_cap_string(revoking)); if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { @@ -1942,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, } } +static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap *cap; + int delayed = 0; + + spin_lock(&inode->i_lock); + cap = ci->i_auth_cap; + dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, + ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); + __ceph_flush_snaps(ci, &session, 1); + if (ci->i_flushing_caps) { + delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + cap->issued | cap->implemented, + ci->i_flushing_caps, NULL); + if (delayed) { + spin_lock(&inode->i_lock); + __cap_delay_requeue(mdsc, ci); + spin_unlock(&inode->i_lock); + } + } else { + spin_unlock(&inode->i_lock); + } +} + /* * Take references to capabilities we hold, so that we don't release @@ -2689,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ceph_add_cap(inode, session, cap_id, -1, issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, NULL /* no caps context */); - try_flush_caps(inode, session, NULL); + kick_flushing_inode_caps(mdsc, session, inode); up_read(&mdsc->snap_rwsem); /* make sure we re-request max_size, if necessary */ -- cgit v1.2.3 From 7e57b81c7688c762bc9e775bc83f9fc17946f527 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 09:00:01 -0800 Subject: ceph: avoid immediate cap check after import The NODELAY flag avoids the heuristics that delay cap (issued/wanted) release. There's no reason for that after we import a cap, and it kills whatever benefit we get from those delays. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7def3f5903dd..6b61ded701e1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2817,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, snaptrace_len); - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, - session); + ceph_check_caps(ceph_inode(inode), 0, session); goto done_unlocked; } -- cgit v1.2.3 From 0ca7a5b9ac5d301845dd6382ff25a699b6263a81 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 21 Jan 2011 16:40:31 +0900 Subject: nilfs2: fix crash after one superblock became unavailable Fixes the following kernel oops in nilfs_setup_super() which could arise if one of two super-blocks is unavailable. > BUG: unable to handle kernel NULL pointer dereference at (null) > Pid: 3529, comm: mount.nilfs2 Not tainted 2.6.37 #1 / > EIP: 0060:[] EFLAGS: 00010202 CPU: 3 > EIP is at memcpy+0xc/0x1b > Call Trace: > [] ? nilfs_setup_super+0x6c/0xa5 [nilfs2] > [] ? nilfs_get_root_dentry+0x81/0xcb [nilfs2] > [] ? nilfs_mount+0x4f9/0x62c [nilfs2] > [] ? kstrdup+0x36/0x3f > [] ? nilfs_mount+0x0/0x62c [nilfs2] > [] ? vfs_kern_mount+0x4d/0x12c > [] ? get_fs_type+0x76/0x8f > [] ? do_kern_mount+0x33/0xbf > [] ? do_mount+0x2ed/0x714 > [] ? copy_mount_options+0x28/0xfc > [] ? sys_mount+0x72/0xaf > [] ? syscall_call+0x7/0xb Reported-by: Wakko Warner Signed-off-by: Ryusuke Konishi Tested-by: Wakko Warner Cc: stable [2.6.37, 2.6.36] LKML-Reference: <20110121024918.GA29598@animx.eu.org> --- fs/nilfs2/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0994f6a76c07..58fd707174e1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -704,7 +704,8 @@ skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); } -- cgit v1.2.3 From d66bbd441c08fe00ed2add1cf70cb243ebc2b27e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 21 Jan 2011 21:16:46 -0800 Subject: ceph: avoid picking MDS that is not active Ignore replication or auth frag data if it indicates an MDS that is not active. This can happen if the MDS shuts down and the client has stale data about the namespace distribution across the MDS cluster. If that's the case, fall back to directing the request based on the auth cap (which should always be accurate). Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 509339ceef72..a6949cc7c69a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (%d/%d)\n", inode, ceph_vinop(inode), - frag.frag, frag.mds, + frag.frag, mds, (int)r, frag.ndist); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } /* since this file/dir wasn't known to be @@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (auth)\n", inode, ceph_vinop(inode), frag.frag, mds); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } } } -- cgit v1.2.3 From 3689456b4bd36027022b3215eb2acba51cd0e6b5 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 25 Jan 2011 15:07:34 -0800 Subject: squashfs: fix use of uninitialised variable in zlib & xz decompressors Fix potential use of uninitialised variable caused by recent decompressor code optimisations. In zlib_uncompress (zlib_wrapper.c) we have int zlib_err, zlib_init = 0; ... do { ... if (avail == 0) { offset = 0; put_bh(bh[k++]); continue; } ... zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH); ... } while (zlib_err == Z_OK); If continue is executed (avail == 0) then the while condition will be evaluated testing zlib_err, which is uninitialised first time around the loop. Fix this by getting rid of the 'if (avail == 0)' condition test, this edge condition should not be being handled in the decompressor code, and instead handle it generically in the caller code. Similarly for xz_wrapper.c. Incidentally, on most architectures (bar Mips and Parisc), no uninitialised variable warning is generated by gcc, this is because the while condition test on continue is optimised out and not performed (when executing continue zlib_err has not been changed since entering the loop, and logically if the while condition was true previously, then it's still true). Signed-off-by: Phillip Lougher Reported-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/block.c | 8 ++++++++ fs/squashfs/xz_wrapper.c | 6 ------ fs/squashfs/zlib_wrapper.c | 6 ------ 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2fb2882f0fa7..8ab48bc2fa7d 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb, *length = (unsigned char) bh->b_data[*offset] | (unsigned char) bh->b_data[*offset + 1] << 8; *offset += 2; + + if (*offset == msblk->devblksize) { + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *offset = 0; + } } return bh; diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 856756ca5ee4..c4eb40018256 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!buffer_uptodate(bh[k])) goto release_mutex; - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 818a5e063faf..4661ae2b1cec 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!buffer_uptodate(bh[k])) goto release_mutex; - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; -- cgit v1.2.3 From ac751efa6a0d70f2c9daef5c7e3a92270f5c2dff Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Tue, 25 Jan 2011 15:07:35 -0800 Subject: console: rename acquire/release_console_sem() to console_lock/unlock() The -rt patches change the console_semaphore to console_mutex. As a result, a quite large chunk of the patches changes all acquire/release_console_sem() to acquire/release_console_mutex() This commit makes things use more neutral function names which dont make implications about the underlying lock. The only real change is the return value of console_trylock which is inverted from try_acquire_console_sem() This patch also paves the way to switching console_sem from a semaphore to a mutex. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: make console_trylock return 1 on success, per Geert] Signed-off-by: Torben Hohn Cc: Thomas Gleixner Cc: Greg KH Cc: Ingo Molnar Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/consoles.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index eafc22ab1fdd..b701eaa482bf 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -67,7 +67,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) struct console *con; loff_t off = 0; - acquire_console_sem(); + console_lock(); for_each_console(con) if (off++ == *pos) break; @@ -84,7 +84,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) static void c_stop(struct seq_file *m, void *v) { - release_console_sem(); + console_unlock(); } static const struct seq_operations consoles_op = { -- cgit v1.2.3 From af5eb745efe97d91d2cbe793029838b3311c15da Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Fri, 28 Jan 2011 20:45:28 +0000 Subject: NTFS: Fix invalid pointer dereference in ntfs_mft_record_alloc(). In ntfs_mft_record_alloc() when mapping the new extent mft record with map_extent_mft_record() we overwrite @m with the return value and on error, we then try to use the old @m but that is no longer there as @m now contains an error code instead so we crash when dereferencing the error code as if it were a pointer. The simple fix is to use a temporary variable to store the return value thus preserving the original @m for later use. This is a backport from the commercial Tuxera-NTFS driver and is well tested... Thanks go to Julia Lawall for pointing this out (whilst I had fixed it in the commercial driver I had failed to fix it in the Linux kernel). Signed-off-by: Anton Altaparmakov Signed-off-by: Linus Torvalds --- fs/ntfs/mft.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index b572b6727181..326e7475a22a 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2006 Anton Altaparmakov + * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -2576,6 +2576,8 @@ mft_rec_already_initialized: flush_dcache_page(page); SetPageUptodate(page); if (base_ni) { + MFT_RECORD *m_tmp; + /* * Setup the base mft record in the extent mft record. This * completes initialization of the allocated extent mft record @@ -2588,11 +2590,11 @@ mft_rec_already_initialized: * attach it to the base inode @base_ni and map, pin, and lock * its, i.e. the allocated, mft record. */ - m = map_extent_mft_record(base_ni, bit, &ni); - if (IS_ERR(m)) { + m_tmp = map_extent_mft_record(base_ni, bit, &ni); + if (IS_ERR(m_tmp)) { ntfs_error(vol->sb, "Failed to map allocated extent " "mft record 0x%llx.", (long long)bit); - err = PTR_ERR(m); + err = PTR_ERR(m_tmp); /* Set the mft record itself not in use. */ m->flags &= cpu_to_le16( ~le16_to_cpu(MFT_RECORD_IN_USE)); @@ -2603,6 +2605,7 @@ mft_rec_already_initialized: ntfs_unmap_page(page); goto undo_mftbmp_alloc; } + BUG_ON(m != m_tmp); /* * Make sure the allocated mft record is written out to disk. * No need to set the inode dirty because the caller is going -- cgit v1.2.3