1 files changed, 143 insertions, 57 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index b60ddc41d783..8c1ab8fb5012 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -141,18 +141,25 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
  * The strings are both count bytes long, and count is non-zero.
  */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-				const unsigned char *ct, size_t tcount)
-{
 #ifdef CONFIG_DCACHE_WORD_ACCESS
-	unsigned long a,b,mask;
 
-	if (unlikely(scount != tcount))
-		return 1;
+#include <asm/word-at-a-time.h>
+/*
+ * NOTE! 'cs' and 'scount' come from a dentry, so it has a
+ * aligned allocation for this particular component. We don't
+ * strictly need the load_unaligned_zeropad() safety, but it
+ * doesn't hurt either.
+ *
+ * In contrast, 'ct' and 'tcount' can be from a pathname, and do
+ * need the careful unaligned handling.
+ */
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
+	unsigned long a,b,mask;
 
 	for (;;) {
 		a = *(unsigned long *)cs;
-		b = *(unsigned long *)ct;
+		b = load_unaligned_zeropad(ct);
 		if (tcount < sizeof(unsigned long))
 			break;
 		if (unlikely(a != b))
@@ -165,10 +172,12 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 	}
 	mask = ~(~0ul << tcount*8);
 	return unlikely(!!((a ^ b) & mask));
+}
+
 #else
-	if (scount != tcount)
-		return 1;
 
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
 	do {
 		if (*cs != *ct)
 			return 1;
@@ -177,7 +186,32 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 		tcount--;
 	} while (tcount);
 	return 0;
+}
+
 #endif
+
+static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
+{
+	const unsigned char *cs;
+	/*
+	 * Be careful about RCU walk racing with rename:
+	 * use ACCESS_ONCE to fetch the name pointer.
+	 *
+	 * NOTE! Even if a rename will mean that the length
+	 * was not loaded atomically, we don't care. The
+	 * RCU walk will check the sequence count eventually,
+	 * and catch it. And we won't overrun the buffer,
+	 * because we're reading the name pointer atomically,
+	 * and a dentry name is guaranteed to be properly
+	 * terminated with a NUL byte.
+	 *
+	 * End result: even if 'len' is wrong, we'll exit
+	 * early because the data cannot match (there can
+	 * be no NUL in the ct/tcount data)
+	 */
+	cs = ACCESS_ONCE(dentry->d_name.name);
+	smp_read_barrier_depends();
+	return dentry_string_cmp(cs, ct, tcount);
 }
 
 static void __d_free(struct rcu_head *head)
@@ -1240,6 +1274,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	if (!dentry)
 		return NULL;
 
+	/*
+	 * We guarantee that the inline name is always NUL-terminated.
+	 * This way the memcpy() done by the name switching in rename
+	 * will still always have a NUL at the end, even if we might
+	 * be overwriting an internal NUL character
+	 */
+	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
 	if (name->len > DNAME_INLINE_LEN-1) {
 		dname = kmalloc(name->len + 1, GFP_KERNEL);
 		if (!dname) {
@@ -1249,13 +1290,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	} else  {
 		dname = dentry->d_iname;
 	}	
-	dentry->d_name.name = dname;
 
 	dentry->d_name.len = name->len;
 	dentry->d_name.hash = name->hash;
 	memcpy(dname, name->name, name->len);
 	dname[name->len] = 0;
 
+	/* Make sure we always see the terminating NUL character */
+	smp_wmb();
+	dentry->d_name.name = dname;
+
 	dentry->d_count = 1;
 	dentry->d_flags = 0;
 	spin_lock_init(&dentry->d_lock);
@@ -1421,18 +1465,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 	}
 
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
-		struct qstr *qstr = &alias->d_name;
-
 		/*
 		 * Don't need alias->d_lock here, because aliases with
 		 * d_parent == entry->d_parent are not subject to name or
 		 * parent changes, because the parent inode i_mutex is held.
 		 */
-		if (qstr->hash != hash)
+		if (alias->d_name.hash != hash)
 			continue;
 		if (alias->d_parent != entry->d_parent)
 			continue;
-		if (dentry_cmp(qstr->name, qstr->len, name, len))
+		if (alias->d_name.len != len)
+			continue;
+		if (dentry_cmp(alias, name, len))
 			continue;
 		__dget(alias);
 		return alias;
@@ -1471,7 +1515,7 @@ struct dentry *d_make_root(struct inode *root_inode)
 	struct dentry *res = NULL;
 
 	if (root_inode) {
-		static const struct qstr name = { .name = "/", .len = 1 };
+		static const struct qstr name = QSTR_INIT("/", 1);
 
 		res = __d_alloc(root_inode->i_sb, &name);
 		if (res)
@@ -1709,6 +1753,48 @@ err_out:
 }
 EXPORT_SYMBOL(d_add_ci);
 
+/*
+ * Do the slow-case of the dentry name compare.
+ *
+ * Unlike the dentry_cmp() function, we need to atomically
+ * load the name, length and inode information, so that the
+ * filesystem can rely on them, and can use the 'name' and
+ * 'len' information without worrying about walking off the
+ * end of memory etc.
+ *
+ * Thus the read_seqcount_retry() and the "duplicate" info
+ * in arguments (the low-level filesystem should not look
+ * at the dentry inode or name contents directly, since
+ * rename can change them while we're in RCU mode).
+ */
+enum slow_d_compare {
+	D_COMP_OK,
+	D_COMP_NOMATCH,
+	D_COMP_SEQRETRY,
+};
+
+static noinline enum slow_d_compare slow_dentry_cmp(
+		const struct dentry *parent,
+		struct inode *inode,
+		struct dentry *dentry,
+		unsigned int seq,
+		const struct qstr *name)
+{
+	int tlen = dentry->d_name.len;
+	const char *tname = dentry->d_name.name;
+	struct inode *i = dentry->d_inode;
+
+	if (read_seqcount_retry(&dentry->d_seq, seq)) {
+		cpu_relax();
+		return D_COMP_SEQRETRY;
+	}
+	if (parent->d_op->d_compare(parent, inode,
+				dentry, i,
+				tlen, tname, name))
+		return D_COMP_NOMATCH;
+	return D_COMP_OK;
+}
+
 /**
  * __d_lookup_rcu - search for a dentry (racy, store-free)
  * @parent: parent dentry
@@ -1735,15 +1821,17 @@ EXPORT_SYMBOL(d_add_ci);
  * the returned dentry, so long as its parent's seqlock is checked after the
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
+ *
+ * NOTE! The caller *has* to check the resulting dentry against the sequence
+ * number we've returned before using any of the resulting dentry state!
  */
 struct dentry *__d_lookup_rcu(const struct dentry *parent,
 				const struct qstr *name,
-				unsigned *seqp, struct inode **inode)
+				unsigned *seqp, struct inode *inode)
 {
-	unsigned int len = name->len;
-	unsigned int hash = name->hash;
+	u64 hashlen = name->hash_len;
 	const unsigned char *str = name->name;
-	struct hlist_bl_head *b = d_hash(parent, hash);
+	struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
 	struct hlist_bl_node *node;
 	struct dentry *dentry;
 
@@ -1769,49 +1857,47 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
 	 */
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 		unsigned seq;
-		struct inode *i;
-		const char *tname;
-		int tlen;
-
-		if (dentry->d_name.hash != hash)
-			continue;
 
 seqretry:
-		seq = read_seqcount_begin(&dentry->d_seq);
+		/*
+		 * The dentry sequence count protects us from concurrent
+		 * renames, and thus protects inode, parent and name fields.
+		 *
+		 * The caller must perform a seqcount check in order
+		 * to do anything useful with the returned dentry,
+		 * including using the 'd_inode' pointer.
+		 *
+		 * NOTE! We do a "raw" seqcount_begin here. That means that
+		 * we don't wait for the sequence count to stabilize if it
+		 * is in the middle of a sequence change. If we do the slow
+		 * dentry compare, we will do seqretries until it is stable,
+		 * and if we end up with a successful lookup, we actually
+		 * want to exit RCU lookup anyway.
+		 */
+		seq = raw_seqcount_begin(&dentry->d_seq);
 		if (dentry->d_parent != parent)
 			continue;
 		if (d_unhashed(dentry))
 			continue;
-		tlen = dentry->d_name.len;
-		tname = dentry->d_name.name;
-		i = dentry->d_inode;
-		prefetch(tname);
-		/*
-		 * This seqcount check is required to ensure name and
-		 * len are loaded atomically, so as not to walk off the
-		 * edge of memory when walking. If we could load this
-		 * atomically some other way, we could drop this check.
-		 */
-		if (read_seqcount_retry(&dentry->d_seq, seq))
-			goto seqretry;
+		*seqp = seq;
+
 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
-			if (parent->d_op->d_compare(parent, *inode,
-						dentry, i,
-						tlen, tname, name))
+			if (dentry->d_name.hash != hashlen_hash(hashlen))
 				continue;
-		} else {
-			if (dentry_cmp(tname, tlen, str, len))
+			switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
+			case D_COMP_OK:
+				return dentry;
+			case D_COMP_NOMATCH:
 				continue;
+			default:
+				goto seqretry;
+			}
 		}
-		/*
-		 * No extra seqcount check is required after the name
-		 * compare. The caller must perform a seqcount check in
-		 * order to do anything useful with the returned dentry
-		 * anyway.
-		 */
-		*seqp = seq;
-		*inode = i;
-		return dentry;
+
+		if (dentry->d_name.hash_len != hashlen)
+			continue;
+		if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
+			return dentry;
 	}
 	return NULL;
 }
@@ -1890,8 +1976,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 	rcu_read_lock();
 	
 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
-		const char *tname;
-		int tlen;
 
 		if (dentry->d_name.hash != hash)
 			continue;
@@ -1906,15 +1990,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 		 * It is safe to compare names since d_move() cannot
 		 * change the qstr (protected by d_lock).
 		 */
-		tlen = dentry->d_name.len;
-		tname = dentry->d_name.name;
 		if (parent->d_flags & DCACHE_OP_COMPARE) {
+			int tlen = dentry->d_name.len;
+			const char *tname = dentry->d_name.name;
 			if (parent->d_op->d_compare(parent, parent->d_inode,
 						dentry, dentry->d_inode,
 						tlen, tname, name))
 				goto next;
 		} else {
-			if (dentry_cmp(tname, tlen, str, len))
+			if (dentry->d_name.len != len)
+				goto next;
+			if (dentry_cmp(dentry, str, len))
 				goto next;
 		}