summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c54
3 files changed, 62 insertions, 1 deletions
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index d74040fac343..177927a8f007 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,9 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
+ * New in version 7:
+ * - DLM join domain includes the live nodemap
+ *
* New in version 6:
* - DLM lockres remote refcount fixes.
*
@@ -54,7 +57,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
-#define O2NET_PROTOCOL_VERSION 6ULL
+#define O2NET_PROTOCOL_VERSION 7ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 2f4f5d4edb07..e90b92f9ece1 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -625,12 +625,16 @@ struct dlm_begin_reco
};
+#define BITS_PER_BYTE 8
+#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE)
+
struct dlm_query_join_request
{
u8 node_idx;
u8 pad1[2];
u8 name_len;
u8 domain[O2NM_MAX_NAME_LEN];
+ u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)];
};
struct dlm_assert_joined
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6590e1bca23c..19b57a6bcb1a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -48,6 +48,36 @@
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
#include "cluster/masklog.h"
+/*
+ * ocfs2 node maps are array of long int, which limits to send them freely
+ * across the wire due to endianness issues. To workaround this, we convert
+ * long ints to byte arrays. Following 3 routines are helper functions to
+ * set/test/copy bits within those array of bytes
+ */
+static inline void byte_set_bit(u8 nr, u8 map[])
+{
+ map[nr >> 3] |= (1UL << (nr & 7));
+}
+
+static inline int byte_test_bit(u8 nr, u8 map[])
+{
+ return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;
+}
+
+static inline void byte_copymap(u8 dmap[], unsigned long smap[],
+ unsigned int sz)
+{
+ unsigned int nn;
+
+ if (!sz)
+ return;
+
+ memset(dmap, 0, ((sz + 7) >> 3));
+ for (nn = 0 ; nn < sz; nn++)
+ if (test_bit(nn, smap))
+ byte_set_bit(nn, dmap);
+}
+
static void dlm_free_pagevec(void **vec, int pages)
{
while (pages--)
@@ -641,6 +671,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
struct dlm_query_join_request *query;
enum dlm_query_join_response response;
struct dlm_ctxt *dlm = NULL;
+ u8 nodenum;
query = (struct dlm_query_join_request *) msg->buf;
@@ -664,6 +695,25 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
+ if (!dlm)
+ goto unlock_respond;
+
+ /*
+ * There is a small window where the joining node may not see the
+ * node(s) that just left but still part of the cluster. DISALLOW
+ * join request if joining node has different node map.
+ */
+ nodenum=0;
+ while (nodenum < O2NM_MAX_NODES) {
+ if (test_bit(nodenum, dlm->domain_map)) {
+ if (!byte_test_bit(nodenum, query->node_map)) {
+ response = JOIN_DISALLOW;
+ goto unlock_respond;
+ }
+ }
+ nodenum++;
+ }
+
/* Once the dlm ctxt is marked as leaving then we don't want
* to be put in someone's domain map.
* Also, explicitly disallow joining at certain troublesome
@@ -705,6 +755,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&dlm->spinlock);
}
+unlock_respond:
spin_unlock(&dlm_domain_lock);
respond:
@@ -854,6 +905,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
join_msg.name_len = strlen(dlm->name);
memcpy(join_msg.domain, dlm->name, join_msg.name_len);
+ /* copy live node map to join message */
+ byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
+
status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
sizeof(join_msg), node, &retval);
if (status < 0 && status != -ENOPROTOOPT) {