diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-05 15:17:02 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-05 15:17:02 -0700 |
commit | 5575eeb7b9f687ca4899e2d8721a9b17265d0060 (patch) | |
tree | f105b068410e48f5b2d65e85802a9d4bfa3e0638 | |
parent | 0624bcaaf06c1fe5aca4e72287a3f13026764d36 (diff) | |
parent | 3bd58143bafc56dbc07f4f085e4d7e018d332674 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil:
"First, there is a critical fix for the new primary-affinity function
that went into -rc1.
The second batch of patches from Zheng fix a range of problems with
directory fragmentation, readdir, and a few odds and ends for cephfs"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: reserve caps for file layout/lock MDS requests
ceph: avoid releasing caps that are being used
ceph: clear directory's completeness when creating file
libceph: fix non-default values check in apply_primary_affinity()
ceph: use fpos_cmp() to compare dentry positions
ceph: check directory's completeness before emitting directory entry
-rw-r--r-- | fs/ceph/caps.c | 2 | ||||
-rw-r--r-- | fs/ceph/dir.c | 33 | ||||
-rw-r--r-- | fs/ceph/inode.c | 71 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 3 | ||||
-rw-r--r-- | fs/ceph/locks.c | 1 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 9 |
7 files changed, 44 insertions, 76 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2e5e648eb5c3..c561b628ebce 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, rel->seq = cpu_to_le32(cap->seq); rel->issue_seq = cpu_to_le32(cap->issue_seq), rel->mseq = cpu_to_le32(cap->mseq); - rel->caps = cpu_to_le32(cap->issued); + rel->caps = cpu_to_le32(cap->implemented); rel->wanted = cpu_to_le32(cap->mds_wanted); rel->dname_len = 0; rel->dname_seq = 0; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 766410a12c2c..c29d6ae68874 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, /* start at beginning? */ if (ctx->pos == 2 || last == NULL || - ctx->pos < ceph_dentry(last)->offset) { + fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { if (list_empty(&parent->d_subdirs)) goto out_unlock; p = parent->d_subdirs.prev; @@ -182,9 +182,16 @@ more: spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); + /* make sure a dentry wasn't dropped while we didn't have parent lock */ + if (!ceph_dir_is_complete(dir)) { + dout(" lost dir complete on %p; falling back to mds\n", dir); + dput(dentry); + err = -EAGAIN; + goto out; + } + dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); - ctx->pos = di->offset; if (!dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), @@ -198,19 +205,12 @@ more: return 0; } + ctx->pos = di->offset + 1; + if (last) dput(last); last = dentry; - ctx->pos++; - - /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_is_complete(dir)) { - dout(" lost dir complete on %p; falling back to mds\n", dir); - err = -EAGAIN; - goto out; - } - spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; @@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) err = __dcache_readdir(file, ctx, shared_gen); if (err != -EAGAIN) return err; + frag = fpos_frag(ctx->pos); + off = fpos_off(ctx->pos); } else { spin_unlock(&ci->i_ceph_lock); } @@ -446,7 +448,6 @@ more: if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { dout(" marking %p complete\n", inode); __ceph_dir_set_complete(ci, fi->dir_release_count); - ci->i_max_offset = ctx->pos; } spin_unlock(&ci->i_ceph_lock); @@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * to do it here. */ - /* d_move screws up d_subdirs order */ - ceph_dir_clear_complete(new_dir); - d_move(old_dentry, new_dentry); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(new_dentry); + + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(old_dir); + ceph_dir_clear_complete(new_dir); + } ceph_mdsc_put_request(req); return err; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 0b0728e5be2d..233c6f96910a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode, !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); - ci->i_max_offset = 2; } no_change: /* only update max_size on auth cap */ @@ -890,41 +889,6 @@ out_unlock: } /* - * Set dentry's directory position based on the current dir's max, and - * order it in d_subdirs, so that dcache_readdir behaves. - * - * Always called under directory's i_mutex. - */ -static void ceph_set_dentry_offset(struct dentry *dn) -{ - struct dentry *dir = dn->d_parent; - struct inode *inode = dir->d_inode; - struct ceph_inode_info *ci; - struct ceph_dentry_info *di; - - BUG_ON(!inode); - - ci = ceph_inode(inode); - di = ceph_dentry(dn); - - spin_lock(&ci->i_ceph_lock); - if (!__ceph_dir_is_complete(ci)) { - spin_unlock(&ci->i_ceph_lock); - return; - } - di->offset = ceph_inode(inode)->i_max_offset++; - spin_unlock(&ci->i_ceph_lock); - - spin_lock(&dir->d_lock); - spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &dir->d_subdirs); - dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, - dn->d_u.d_child.prev, dn->d_u.d_child.next); - spin_unlock(&dn->d_lock); - spin_unlock(&dir->d_lock); -} - -/* * splice a dentry to an inode. * caller must hold directory i_mutex for this to be safe. * @@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) * the caller) if we fail. */ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, - bool *prehash, bool set_offset) + bool *prehash) { struct dentry *realdn; @@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); - if (set_offset) - ceph_set_dentry_offset(dn); out: return dn; } @@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, { struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct inode *in = NULL; - struct ceph_mds_reply_inode *ininfo; struct ceph_vino vino; struct ceph_fs_client *fsc = ceph_sb_to_client(sb); int err = 0; @@ -1161,6 +1122,9 @@ retry_lookup: /* rename? */ if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { + struct inode *olddir = req->r_old_dentry_dir; + BUG_ON(!olddir); + dout(" src %p '%.*s' dst %p '%.*s'\n", req->r_old_dentry, req->r_old_dentry->d_name.len, @@ -1180,13 +1144,10 @@ retry_lookup: rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - /* - * d_move() puts the renamed dentry at the end of - * d_subdirs. We need to assign it an appropriate - * directory offset so we can behave when dir is - * complete. - */ - ceph_set_dentry_offset(req->r_old_dentry); + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(dir); + ceph_dir_clear_complete(olddir); + dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1213,8 +1174,9 @@ retry_lookup: /* attach proper inode */ if (!dn->d_inode) { + ceph_dir_clear_complete(dir); ihold(in); - dn = splice_dentry(dn, in, &have_lease, true); + dn = splice_dentry(dn, in, &have_lease); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1235,17 +1197,16 @@ retry_lookup: (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP)) { struct dentry *dn = req->r_dentry; + struct inode *dir = req->r_locked_dir; /* fill out a snapdir LOOKUPSNAP dentry */ BUG_ON(!dn); - BUG_ON(!req->r_locked_dir); - BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); - ininfo = rinfo->targeti.in; - vino.ino = le64_to_cpu(ininfo->ino); - vino.snap = le64_to_cpu(ininfo->snapid); + BUG_ON(!dir); + BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); dout(" linking snapped dir %p to dn %p\n", in, dn); + ceph_dir_clear_complete(dir); ihold(in); - dn = splice_dentry(dn, in, NULL, true); + dn = splice_dentry(dn, in, NULL); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1407,7 +1368,7 @@ retry_lookup: } if (!dn->d_inode) { - dn = splice_dentry(dn, in, NULL, false); + dn = splice_dentry(dn, in, NULL); if (IS_ERR(dn)) { err = PTR_ERR(dn); dn = NULL; diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index fdf941b44ff1..a822a6e58290 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) return PTR_ERR(req); req->r_inode = inode; ihold(inode); + req->r_num_caps = 1; + req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; req->r_args.setlayout.layout.fl_stripe_unit = @@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) return PTR_ERR(req); req->r_inode = inode; ihold(inode); + req->r_num_caps = 1; req->r_args.setlayout.layout.fl_stripe_unit = cpu_to_le32(l.stripe_unit); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d94ba0df9f4d..191398852a2e 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, return PTR_ERR(req); req->r_inode = inode; ihold(inode); + req->r_num_caps = 1; /* mds requires start and length rather than start and end */ if (LLONG_MAX == fl->fl_end) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7866cd05a6bb..ead05cc1f447 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -266,7 +266,6 @@ struct ceph_inode_info { struct timespec i_rctime; u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; - u64 i_max_offset; /* largest readdir offset, set with complete dir */ struct rb_root i_fragtree; struct mutex i_fragtree_mutex; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e632b5a52f5b..8b8a5a24b223 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1548,8 +1548,10 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, return; for (i = 0; i < len; i++) { - if (osds[i] != CRUSH_ITEM_NONE && - osdmap->osd_primary_affinity[i] != + int osd = osds[i]; + + if (osd != CRUSH_ITEM_NONE && + osdmap->osd_primary_affinity[osd] != CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) { break; } @@ -1563,10 +1565,9 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, * osd's pgs get rejected as primary. */ for (i = 0; i < len; i++) { - int osd; + int osd = osds[i]; u32 aff; - osd = osds[i]; if (osd == CRUSH_ITEM_NONE) continue; |