summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Aring <aahringo@redhat.com>2025-07-23 11:21:56 -0400
committerDavid Teigland <teigland@redhat.com>2025-08-12 11:37:21 -0500
commit6f4f4ca5caf73de5e86329547d4527b3e0c08488 (patch)
tree5843d3994f8173d87c853de702d0a3882fd33c21
parentde7b4869b4ecf5790b0e7875c5522d43d7a61d79 (diff)
dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release
When dlm_lockspace_release() is passed DLM_RELEASE_RECOVER, it tells the dlm to handle the release/leave as if the node had failed, i.e. perform recovery steps for a failed node, like recover_slot(). When DLM_RELEASE_RECOVER is set: - dlm_release_lockspace() includes RELEASE_RECOVER=1 in the OFFLINE uevent sent to userspace. - userspace/dlm_controld sends a message to all lockspace members indicating that the subsequent node removal should be handled as if the node had failed. - when dlm_controld on all nodes receives the new message, it sets the release_recover configfs entry to 1 for the node. - when the dlm/kernel next performs recovery and removes the node, it will see that release_recover has been set, and will perform recovery steps for the node as if it had failed, e.g. the recover_slot() callback is called to notify the fs. Signed-off-by: Alexander Aring <aahringo@redhat.com> Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dlm/member.c14
-rw-r--r--include/linux/dlm.h5
3 files changed, 16 insertions, 5 deletions
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 6ff666a511c7..d986b7ef153d 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -738,7 +738,7 @@ static int release_lockspace(struct dlm_ls *ls, int release_option)
if (release_option != DLM_RELEASE_NO_EVENT &&
dlm_user_daemon_available())
- do_uevent(ls, 0, 0);
+ do_uevent(ls, 0, (release_option == DLM_RELEASE_RECOVER));
dlm_recoverd_stop(ls);
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 152d2cb16f59..356337102015 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -478,7 +478,8 @@ static void dlm_lsop_recover_prep(struct dlm_ls *ls)
ls->ls_ops->recover_prep(ls->ls_ops_arg);
}
-static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
+static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb,
+ unsigned int release_recover)
{
struct dlm_slot slot;
uint32_t seq;
@@ -495,7 +496,7 @@ static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
error = dlm_comm_seq(memb->nodeid, &seq, false);
- if (!error && seq == memb->comm_seq)
+ if (!release_recover && !error && seq == memb->comm_seq)
return;
slot.nodeid = memb->nodeid;
@@ -552,6 +553,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
struct dlm_member *memb, *safe;
struct dlm_config_node *node;
int i, error, neg = 0, low = -1;
+ unsigned int release_recover;
/* previously removed members that we've not finished removing need to
* count as a negative change so the "neg" recovery steps will happen
@@ -572,8 +574,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
if (node && !node->new && !node->gone)
continue;
+ release_recover = 0;
+
if (node->gone) {
- log_rinfo(ls, "remove member %d", memb->nodeid);
+ release_recover = node->release_recover;
+ log_rinfo(ls, "remove member %d%s", memb->nodeid,
+ release_recover ? " (release_recover)" : "");
} else {
/* removed and re-added */
log_rinfo(ls, "remove member %d comm_seq %u %u",
@@ -584,7 +590,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
list_move(&memb->list, &ls->ls_nodes_gone);
remove_remote_member(memb->nodeid);
ls->ls_num_nodes--;
- dlm_lsop_recover_slot(ls, memb);
+ dlm_lsop_recover_slot(ls, memb, release_recover);
}
/* add new members to ls_nodes */
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index cc7a36244893..108eb953eb18 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -103,11 +103,16 @@ int dlm_new_lockspace(const char *name, const char *cluster,
* a leave event to the cluster manager, so other nodes will
* not be notified that the node should be removed from the
* list of lockspace members.
+ *
+ * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining
+ * nodes will handle the removal of the node as if the node
+ * had failed, e.g. the recover_slot() callback would be used.
*/
#define DLM_RELEASE_NO_LOCKS 0
#define DLM_RELEASE_UNUSED 1
#define DLM_RELEASE_NORMAL 2
#define DLM_RELEASE_NO_EVENT 3
+#define DLM_RELEASE_RECOVER 4
/*
* dlm_release_lockspace