summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorMartin K. Petersen <martin.petersen@oracle.com>2023-05-22 16:35:02 -0400
committerMartin K. Petersen <martin.petersen@oracle.com>2023-05-22 16:35:02 -0400
commit7907ad748bdba8ac9ca47f0a650cc2e5d2ad6e24 (patch)
tree068ffd5248c8c988015fc751fe8b68dd51347943 /include/linux
parent16853cd8f6d44d774f683d670be38c7d91eb32b8 (diff)
parent394f811848827ad23d2b43e94e5d72a24cfbc39f (diff)
Merge patch series "Use block pr_ops in LIO"
Mike Christie <michael.christie@oracle.com> says: The patches in this thread allow us to use the block pr_ops with LIO's target_core_iblock module to support cluster applications in VMs. They were built over Linus's tree. They also apply over linux-next and Martin's tree and Jens's trees. Currently, to use windows clustering or linux clustering (pacemaker + cluster labs scsi fence agents) in VMs with LIO and vhost-scsi, you have to use tcmu or pscsi or use a cluster aware FS/framework for the LIO pr file. Setting up a cluster FS/framework is pain and waste when your real backend device is already a distributed device, and pscsi and tcmu are nice for specific use cases, but iblock gives you the best performance and allows you to use stacked devices like dm-multipath. So these patches allow iblock to work like pscsi/tcmu where they can pass a PR command to the backend module. And then iblock will use the pr_ops to pass the PR command to the real devices similar to what we do for unmap today. The patches are separated in the following groups: Patch 1 - 2: - Add block layer callouts for reading reservations and rename reservation error code. Patch 3 - 5: - SCSI support for new callouts. Patch 6: - DM support for new callouts. Patch 7 - 13: - NVMe support for new callouts. Patch 14 - 18: - LIO support for new callouts. This patchset has been tested with the libiscsi PGR ops and with window's failover cluster verification test. Note that for scsi backend devices we need this patchset: https://lore.kernel.org/linux-scsi/20230123221046.125483-1-michael.christie@oracle.com/T/#m4834a643ffb5bac2529d65d40906d3cfbdd9b1b7 to handle UAs. To reduce the size of this patchset that's being done separately to make reviewing easier. And to make merging easier this patchset and the one above do not have any conflicts so can be merged in different trees. Link: https://lore.kernel.org/r/20230407200551.12660-1-michael.christie@oracle.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/blk_types.h4
-rw-r--r--include/linux/nvme.h51
-rw-r--r--include/linux/pr.h25
3 files changed, 70 insertions, 10 deletions
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 740afe80f297..936e898016f8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -101,7 +101,7 @@ typedef u16 blk_short_t;
#define BLK_STS_NOSPC ((__force blk_status_t)3)
#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
#define BLK_STS_TARGET ((__force blk_status_t)5)
-#define BLK_STS_NEXUS ((__force blk_status_t)6)
+#define BLK_STS_RESV_CONFLICT ((__force blk_status_t)6)
#define BLK_STS_MEDIUM ((__force blk_status_t)7)
#define BLK_STS_PROTECTION ((__force blk_status_t)8)
#define BLK_STS_RESOURCE ((__force blk_status_t)9)
@@ -189,7 +189,7 @@ static inline bool blk_path_error(blk_status_t error)
case BLK_STS_NOTSUPP:
case BLK_STS_NOSPC:
case BLK_STS_TARGET:
- case BLK_STS_NEXUS:
+ case BLK_STS_RESV_CONFLICT:
case BLK_STS_MEDIUM:
case BLK_STS_PROTECTION:
return false;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 779507ac750b..182b6d614eb1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -759,20 +759,55 @@ enum {
NVME_LBART_ATTRIB_HIDE = 1 << 1,
};
+enum nvme_pr_type {
+ NVME_PR_WRITE_EXCLUSIVE = 1,
+ NVME_PR_EXCLUSIVE_ACCESS = 2,
+ NVME_PR_WRITE_EXCLUSIVE_REG_ONLY = 3,
+ NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY = 4,
+ NVME_PR_WRITE_EXCLUSIVE_ALL_REGS = 5,
+ NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS = 6,
+};
+
+enum nvme_eds {
+ NVME_EXTENDED_DATA_STRUCT = 0x1,
+};
+
+struct nvme_registered_ctrl {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 rsvd3[5];
+ __le64 hostid;
+ __le64 rkey;
+};
+
struct nvme_reservation_status {
__le32 gen;
__u8 rtype;
__u8 regctl[2];
__u8 resv5[2];
__u8 ptpls;
- __u8 resv10[13];
- struct {
- __le16 cntlid;
- __u8 rcsts;
- __u8 resv3[5];
- __le64 hostid;
- __le64 rkey;
- } regctl_ds[];
+ __u8 resv10[14];
+ struct nvme_registered_ctrl regctl_ds[];
+};
+
+struct nvme_registered_ctrl_ext {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 rsvd3[5];
+ __le64 rkey;
+ __u8 hostid[16];
+ __u8 rsvd32[32];
+};
+
+struct nvme_reservation_status_ext {
+ __le32 gen;
+ __u8 rtype;
+ __u8 regctl[2];
+ __u8 resv5[2];
+ __u8 ptpls;
+ __u8 resv10[14];
+ __u8 rsvd24[40];
+ struct nvme_registered_ctrl_ext regctl_eds[];
};
enum nvme_async_event_type {
diff --git a/include/linux/pr.h b/include/linux/pr.h
index 94ceec713afe..3003daec28a5 100644
--- a/include/linux/pr.h
+++ b/include/linux/pr.h
@@ -4,6 +4,18 @@
#include <uapi/linux/pr.h>
+struct pr_keys {
+ u32 generation;
+ u32 num_keys;
+ u64 keys[];
+};
+
+struct pr_held_reservation {
+ u64 key;
+ u32 generation;
+ enum pr_type type;
+};
+
struct pr_ops {
int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
u32 flags);
@@ -14,6 +26,19 @@ struct pr_ops {
int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
enum pr_type type, bool abort);
int (*pr_clear)(struct block_device *bdev, u64 key);
+ /*
+ * pr_read_keys - Read the registered keys and return them in the
+ * pr_keys->keys array. The keys array will have been allocated at the
+ * end of the pr_keys struct, and pr_keys->num_keys must be set to the
+ * number of keys the array can hold. If there are more than can fit
+ * in the array, success will still be returned and pr_keys->num_keys
+ * will reflect the total number of keys the device contains, so the
+ * caller can retry with a larger array.
+ */
+ int (*pr_read_keys)(struct block_device *bdev,
+ struct pr_keys *keys_info);
+ int (*pr_read_reservation)(struct block_device *bdev,
+ struct pr_held_reservation *rsv);
};
#endif /* LINUX_PR_H */