summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 11:39:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 11:39:36 -0700
commit6f59de9bc0d576eb5a5edfea470527902315e924 (patch)
treeeeb477da0caa8e9569074f448169fb792fac90d9 /include
parent3e406741b19890c3d8a2ed126aa7c23b106ca9e1 (diff)
parent533c87e2ed742454957f14d7bef9f48d5a72e72d (diff)
Merge tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - ublk updates: - Add support for updating the size of a ublk instance - Zero-copy improvements - Auto-registering of buffers for zero-copy - Series simplifying and improving GET_DATA and request lookup - Series adding quiesce support - Lots of selftests additions - Various cleanups - NVMe updates via Christoph: - add per-node DMA pools and use them for PRP/SGL allocations (Caleb Sander Mateos, Keith Busch) - nvme-fcloop refcounting fixes (Daniel Wagner) - support delayed removal of the multipath node and optionally support the multipath node for private namespaces (Nilay Shroff) - support shared CQs in the PCI endpoint target code (Wilfred Mallawa) - support admin-queue only authentication (Hannes Reinecke) - use the crc32c library instead of the crypto API (Eric Biggers) - misc cleanups (Christoph Hellwig, Marcelo Moreira, Hannes Reinecke, Leon Romanovsky, Gustavo A. R. Silva) - MD updates via Yu: - Fix that normal IO can be starved by sync IO, found by mkfs on newly created large raid5, with some clean up patches for bdev inflight counters - Clean up brd, getting rid of atomic kmaps and bvec poking - Add loop driver specifically for zoned IO testing - Eliminate blk-rq-qos calls with a static key, if not enabled - Improve hctx locking for when a plug has IO for multiple queues pending - Remove block layer bouncing support, which in turn means we can remove the per-node bounce stat as well - Improve blk-throttle support - Improve delay support for blk-throttle - Improve brd discard support - Unify IO scheduler switching. This should also fix a bunch of lockdep warnings we've been seeing, after enabling lockdep support for queue freezing/unfreezeing - Add support for block write streams via FDP (flexible data placement) on NVMe - Add a bunch of block helpers, facilitating the removal of a bunch of duplicated boilerplate code - Remove obsolete BLK_MQ pci and virtio Kconfig options - Add atomic/untorn write support to blktrace - Various little cleanups and fixes * tag 'for-6.16/block-20250523' of git://git.kernel.dk/linux: (186 commits) selftests: ublk: add test for UBLK_F_QUIESCE ublk: add feature UBLK_F_QUIESCE selftests: ublk: add test case for UBLK_U_CMD_UPDATE_SIZE traceevent/block: Add REQ_ATOMIC flag to block trace events ublk: run auto buf unregisgering in same io_ring_ctx with registering io_uring: add helper io_uring_cmd_ctx_handle() ublk: remove io argument from ublk_auto_buf_reg_fallback() ublk: handle ublk_set_auto_buf_reg() failure correctly in ublk_fetch() selftests: ublk: add test for covering UBLK_AUTO_BUF_REG_FALLBACK selftests: ublk: support UBLK_F_AUTO_BUF_REG ublk: support UBLK_AUTO_BUF_REG_FALLBACK ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG ublk: prepare for supporting to register request buffer automatically ublk: convert to refcount_t selftests: ublk: make IO & device removal test more stressful nvme: rename nvme_mpath_shutdown_disk to nvme_mpath_remove_disk nvme: introduce multipath_always_on module param nvme-multipath: introduce delayed removal of the multipath head node nvme-pci: derive and better document max segments limits nvme-pci: use struct_size for allocation struct nvme_dev ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h25
-rw-r--r--include/linux/blk-mq.h10
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/blkdev.h24
-rw-r--r--include/linux/dmapool.h21
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/io_uring/cmd.h9
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--include/linux/nvme.h77
-rw-r--r--include/linux/part_stat.h2
-rw-r--r--include/scsi/scsi_host.h2
-rw-r--r--include/trace/events/block.h17
-rw-r--r--include/uapi/linux/blktrace_api.h2
-rw-r--r--include/uapi/linux/io_uring.h4
-rw-r--r--include/uapi/linux/ublk_cmd.h128
15 files changed, 299 insertions, 34 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b786ec5bcc81..9c37c66ef9ca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -403,7 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
struct request_queue;
-extern int submit_bio_wait(struct bio *bio);
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf);
extern void bio_uninit(struct bio *);
@@ -418,6 +417,30 @@ void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
size_t off);
+void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len);
+
+/**
+ * bio_add_max_vecs - number of bio_vecs needed to add data to a bio
+ * @kaddr: kernel virtual address to add
+ * @len: length in bytes to add
+ *
+ * Calculate how many bio_vecs need to be allocated to add the kernel virtual
+ * address range in [@kaddr:@len] in the worse case.
+ */
+static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len)
+{
+ if (is_vmalloc_addr(kaddr))
+ return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE);
+ return 1;
+}
+
+unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len);
+bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len);
+
+int submit_bio_wait(struct bio *bio);
+int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
+ size_t len, enum req_op op);
+
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8eb9b3310167..de8c85a03bb7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -9,6 +9,7 @@
#include <linux/prefetch.h>
#include <linux/srcu.h>
#include <linux/rw_hint.h>
+#include <linux/rwsem.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -506,6 +507,9 @@ enum hctx_type {
* request_queue.tag_set_list.
* @srcu: Use as lock when type of the request queue is blocking
* (BLK_MQ_F_BLOCKING).
+ * @update_nr_hwq_lock:
+ * Synchronize updating nr_hw_queues with add/del disk &
+ * switching elevator.
*/
struct blk_mq_tag_set {
const struct blk_mq_ops *ops;
@@ -527,6 +531,8 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
struct srcu_struct *srcu;
+
+ struct rw_semaphore update_nr_hwq_lock;
};
/**
@@ -1031,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *,
int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *, gfp_t);
int blk_rq_unmap_user(struct bio *);
-int blk_rq_map_kern(struct request_queue *, struct request *, void *,
- unsigned int, gfp_t);
+int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
+ gfp_t gfp);
int blk_rq_append_bio(struct request *rq, struct bio *bio);
void blk_execute_rq_nowait(struct request *rq, bool at_head);
blk_status_t blk_execute_rq(struct request *rq, bool at_head);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dce7615c35e7..3d1577f07c1c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -220,6 +220,7 @@ struct bio {
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
enum rw_hint bi_write_hint;
+ u8 bi_write_stream;
blk_status_t bi_status;
atomic_t __bi_remaining;
@@ -286,7 +287,6 @@ struct bio {
enum {
BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */
BIO_CLONED, /* doesn't own data */
- BIO_BOUNCED, /* bio is a bounce bio */
BIO_QUIET, /* Make BIO Quiet */
BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
BIO_REFFED, /* bio has elevated ->bi_cnt */
@@ -296,6 +296,14 @@ enum {
* of this bio. */
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
+ /*
+ * This bio has completed bps throttling at the single tg granularity,
+ * which is different from BIO_BPS_THROTTLED. When the bio is enqueued
+ * into the sq->queued of the upper tg, or is about to be dispatched,
+ * this flag needs to be cleared. Since blk-throttle and rq_qos are not
+ * on the same hierarchical level, reuse the value.
+ */
+ BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED,
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9a1f0ee40b56..332b56f323d9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -182,7 +182,6 @@ struct gendisk {
struct list_head slave_bdevs;
#endif
struct timer_rand_state *random;
- atomic_t sync_io; /* RAID */
struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_ZONED
@@ -218,6 +217,8 @@ struct gendisk {
* devices that do not have multiple independent access ranges.
*/
struct blk_independent_access_ranges *ia_ranges;
+
+ struct mutex rqos_state_mutex; /* rqos state change mutex */
};
/**
@@ -331,9 +332,6 @@ typedef unsigned int __bitwise blk_features_t;
/* skip this queue in blk_mq_(un)quiesce_tagset */
#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13))
-/* bounce all highmem pages */
-#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14))
-
/* undocumented magic for bcache */
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
((__force blk_features_t)(1u << 15))
@@ -347,7 +345,7 @@ typedef unsigned int __bitwise blk_features_t;
*/
#define BLK_FEAT_INHERIT_MASK \
(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
- BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+ BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \
BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
/* internal flags in queue_limits.flags */
@@ -405,6 +403,9 @@ struct queue_limits {
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
+ unsigned short max_write_streams;
+ unsigned int write_stream_granularity;
+
unsigned int max_open_zones;
unsigned int max_active_zones;
@@ -644,6 +645,8 @@ enum {
QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */
QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */
QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
+ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
+ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
QUEUE_FLAG_MAX
};
@@ -679,6 +682,10 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
#define blk_queue_skip_tagset_quiesce(q) \
((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
+#define blk_queue_disable_wbt(q) \
+ test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
+#define blk_queue_no_elv_switch(q) \
+ test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -1288,6 +1295,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
return queue_max_segments(bdev_get_queue(bdev));
}
+static inline unsigned short bdev_max_write_streams(struct block_device *bdev)
+{
+ if (bdev_is_partition(bdev))
+ return 0;
+ return bdev_limits(bdev)->max_write_streams;
+}
+
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
return q->limits.logical_block_size;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb4238..06c4de602b2f 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -11,6 +11,7 @@
#ifndef LINUX_DMAPOOL_H
#define LINUX_DMAPOOL_H
+#include <linux/nodemask_types.h>
#include <linux/scatterlist.h>
#include <asm/io.h>
@@ -18,8 +19,8 @@ struct device;
#ifdef CONFIG_HAS_DMA
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+ size_t size, size_t align, size_t boundary, int node);
void dma_pool_destroy(struct dma_pool *pool);
@@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
void dmam_pool_destroy(struct dma_pool *pool);
#else /* !CONFIG_HAS_DMA */
-static inline struct dma_pool *dma_pool_create(const char *name,
- struct device *dev, size_t size, size_t align, size_t allocation)
-{ return NULL; }
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+ struct device *dev, size_t size, size_t align, size_t boundary,
+ int node)
+{
+ return NULL;
+}
static inline void dma_pool_destroy(struct dma_pool *pool) { }
static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle) { return NULL; }
@@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name,
static inline void dmam_pool_destroy(struct dma_pool *pool) { }
#endif /* !CONFIG_HAS_DMA */
+static inline struct dma_pool *dma_pool_create(const char *name,
+ struct device *dev, size_t size, size_t align, size_t boundary)
+{
+ return dma_pool_create_node(name, dev, size, align, boundary,
+ NUMA_NO_NODE);
+}
+
static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 81f8f0aec563..b807045614c6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -408,6 +408,7 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
+ u8 ki_write_stream;
union {
/*
* Only used for async buffered reads, where it denotes the
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 0634a3de1782..53408124c1e5 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
return cmd_to_io_kiocb(cmd)->async_data;
}
+/*
+ * Return uring_cmd's context reference as its context handle for driver to
+ * track per-context resource, such as registered kernel IO buffer
+ */
+static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
+{
+ return cmd_to_io_kiocb(cmd)->ctx;
+}
+
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
void (*release)(void *), unsigned int index,
unsigned int issue_flags);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ccec1bf2896..b1c459f7a485 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -148,7 +148,6 @@ enum zone_stat_item {
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
/* Second 128 byte cacheline */
- NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2479ed10f53e..51308f65b72f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -303,6 +303,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
NVME_CTRL_ATTR_RHII = (1 << 18),
+ NVME_CTRL_ATTR_FDPS = (1 << 19),
};
struct nvme_id_ctrl {
@@ -689,6 +690,44 @@ struct nvme_rotational_media_log {
__u8 rsvd24[488];
};
+struct nvme_fdp_config {
+ __u8 flags;
+#define FDPCFG_FDPE (1U << 0)
+ __u8 fdpcidx;
+ __le16 reserved;
+};
+
+struct nvme_fdp_ruh_desc {
+ __u8 ruht;
+ __u8 reserved[3];
+};
+
+struct nvme_fdp_config_desc {
+ __le16 dsze;
+ __u8 fdpa;
+ __u8 vss;
+ __le32 nrg;
+ __le16 nruh;
+ __le16 maxpids;
+ __le32 nns;
+ __le64 runs;
+ __le32 erutl;
+ __u8 rsvd28[36];
+ struct nvme_fdp_ruh_desc ruhs[];
+};
+
+struct nvme_fdp_config_log {
+ __le16 numfdpc;
+ __u8 ver;
+ __u8 rsvd3;
+ __le32 sze;
+ __u8 rsvd8[8];
+ /*
+ * This is followed by variable number of nvme_fdp_config_desc
+ * structures, but sparse doesn't like nested variable sized arrays.
+ */
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -915,6 +954,7 @@ enum nvme_opcode {
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
@@ -936,6 +976,7 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_register), \
nvme_opcode_name(nvme_cmd_resv_report), \
nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
@@ -1087,6 +1128,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_RW_DTYPE_DPLCMT = 2 << 4,
NVME_WZ_DEAC = 1 << 9,
};
@@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd {
__le32 cdw14[2];
};
+struct nvme_io_mgmt_recv_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 mo;
+ __u8 rsvd11;
+ __u16 mos;
+ __le32 numd;
+ __le32 cdw12[4];
+};
+
+enum {
+ NVME_IO_MGMT_RECV_MO_RUHS = 1,
+};
+
+struct nvme_fdp_ruh_status_desc {
+ __le16 pid;
+ __le16 ruhid;
+ __le32 earutr;
+ __le64 ruamw;
+ __u8 reserved[16];
+};
+
+struct nvme_fdp_ruh_status {
+ __u8 rsvd0[14];
+ __le16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
@@ -1309,6 +1383,7 @@ enum {
NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_HOST_BEHAVIOR = 0x16,
NVME_FEAT_SANITIZE = 0x17,
+ NVME_FEAT_FDP = 0x1d,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -1329,6 +1404,7 @@ enum {
NVME_LOG_ANA = 0x0c,
NVME_LOG_FEATURES = 0x12,
NVME_LOG_RMI = 0x16,
+ NVME_LOG_FDP_CONFIGS = 0x20,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1923,6 +1999,7 @@ struct nvme_command {
struct nvmf_auth_receive_command auth_receive;
struct nvme_dbbuf dbbuf;
struct nvme_directive_cmd directive;
+ struct nvme_io_mgmt_recv_cmd imr;
};
};
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index c5e9cac0575e..eeeff2a04529 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value)
#define part_stat_local_read_cpu(part, field, cpu) \
local_read(&(part_stat_get_cpu(part, field, cpu)))
+unsigned int bdev_count_inflight(struct block_device *part);
+
#endif /* _LINUX_PART_STAT_H */
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 26bc23419cfd..c53812b9026f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -670,8 +670,6 @@ struct Scsi_Host {
/* The transport requires the LUN bits NOT to be stored in CDB[1] */
unsigned no_scsi2_lun_in_cdb:1;
- unsigned no_highmem:1;
-
/*
* Optional work queue to be utilized by the transport
*/
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index bd0ea07338eb..14a924c0e303 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -11,7 +11,7 @@
#include <linux/tracepoint.h>
#include <uapi/linux/ioprio.h>
-#define RWBS_LEN 8
+#define RWBS_LEN 9
#define IOPRIO_CLASS_STRINGS \
{ IOPRIO_CLASS_NONE, "none" }, \
@@ -361,21 +361,6 @@ DECLARE_EVENT_CLASS(block_bio,
);
/**
- * block_bio_bounce - used bounce buffer when processing block operation
- * @bio: block operation
- *
- * A bounce buffer was used to handle the block operation @bio in @q.
- * This occurs when hardware limitations prevent a direct transfer of
- * data between the @bio data memory area and the IO device. Use of a
- * bounce buffer requires extra copying of data and decreases
- * performance.
- */
-DEFINE_EVENT(block_bio, block_bio_bounce,
- TP_PROTO(struct bio *bio),
- TP_ARGS(bio)
-);
-
-/**
* block_bio_backmerge - merging block operation to the end of an existing operation
* @bio: new block operation to merge
*
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 690621b610e5..1bfb635e309b 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -49,7 +49,7 @@ enum blktrace_act {
__BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
__BLK_TA_INSERT, /* insert request */
__BLK_TA_SPLIT, /* bio was split */
- __BLK_TA_BOUNCE, /* bio was bounced */
+ __BLK_TA_BOUNCE, /* unused, was: bio was bounced */
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8f1fc12bac46..50e372ea97c5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -93,6 +93,10 @@ struct io_uring_sqe {
__u16 addr_len;
__u16 __pad3[1];
};
+ struct {
+ __u8 write_stream;
+ __u8 __pad4[3];
+ };
};
union {
struct {
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 583b86681c93..56c7e3fc666f 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -51,6 +51,10 @@
_IOR('u', 0x13, struct ublksrv_ctrl_cmd)
#define UBLK_U_CMD_DEL_DEV_ASYNC \
_IOR('u', 0x14, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_UPDATE_SIZE \
+ _IOWR('u', 0x15, struct ublksrv_ctrl_cmd)
+#define UBLK_U_CMD_QUIESCE_DEV \
+ _IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
/*
* 64bits are enough now, and it should be easy to extend in case of
@@ -211,6 +215,63 @@
*/
#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
+/*
+ * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE
+ * New size is passed in cmd->data[0] and is in units of sectors
+ */
+#define UBLK_F_UPDATE_SIZE (1ULL << 10)
+
+/*
+ * request buffer is registered automatically to uring_cmd's io_uring
+ * context before delivering this io command to ublk server, meantime
+ * it is un-registered automatically when completing this io command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table on the same `io_ring_ctx`
+ * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`.
+ * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's
+ * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF`
+ * manually, otherwise this ublk request won't complete.
+ *
+ * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
+ * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
+ * the definition of ublk_sqe_addr_to_auto_buf_reg()
+ *
+ * - pass buffer index from `ublk_auto_buf_reg.index`
+ *
+ * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
+ *
+ * - pass flags from `ublk_auto_buf_reg.flags` if needed
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ *
+ * If wrong data or flags are provided, both IO_FETCH_REQ and
+ * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request
+ * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued
+ * successfully
+ */
+#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
+
+/*
+ * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device,
+ * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or
+ * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for
+ * handling `UBLK_IO_RES_ABORT` correctly.
+ *
+ * Typical use case is for supporting to upgrade ublk server application,
+ * meantime keep ublk block device persistent during the period.
+ *
+ * This feature is only available when UBLK_F_USER_RECOVERY is enabled.
+ *
+ * Note, this command returns -EBUSY in case that all IO commands are being
+ * handled by ublk server and not completed in specified time period which
+ * is passed from the control command parameter.
+ */
+#define UBLK_F_QUIESCE (1ULL << 12)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -297,6 +358,17 @@ struct ublksrv_ctrl_dev_info {
#define UBLK_IO_F_FUA (1U << 13)
#define UBLK_IO_F_NOUNMAP (1U << 15)
#define UBLK_IO_F_SWAP (1U << 16)
+/*
+ * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only.
+ *
+ * This flag is set if auto buffer register is failed & ublk server passes
+ * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer
+ * manually for handling the delivered IO command if this flag is observed
+ *
+ * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is
+ * passed in.
+ */
+#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
/*
* io cmd is described by this structure, and stored in share memory, indexed
@@ -331,6 +403,62 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
return iod->op_flags >> 8;
}
+/*
+ * If this flag is set, fallback by completing the uring_cmd and setting
+ * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure;
+ * otherwise the client ublk request is failed silently
+ *
+ * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF
+ * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set,
+ * ublk server needs to register io buffer manually for handling IO command.
+ */
+#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0)
+#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK
+
+struct ublk_auto_buf_reg {
+ /* index for registering the delivered request buffer */
+ __u16 index;
+ __u8 flags;
+ __u8 reserved0;
+
+ /*
+ * io_ring FD can be passed via the reserve field in future for
+ * supporting to register io buffer to external io_uring
+ */
+ __u32 reserved1;
+};
+
+/*
+ * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
+ * uring_cmd's sqe->addr:
+ *
+ * - bit0 ~ bit15: buffer index
+ * - bit16 ~ bit23: flags
+ * - bit24 ~ bit31: reserved0
+ * - bit32 ~ bit63: reserved1
+ */
+static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
+ __u64 sqe_addr)
+{
+ struct ublk_auto_buf_reg reg = {
+ .index = sqe_addr & 0xffff,
+ .flags = (sqe_addr >> 16) & 0xff,
+ .reserved0 = (sqe_addr >> 24) & 0xff,
+ .reserved1 = sqe_addr >> 32,
+ };
+
+ return reg;
+}
+
+static inline __u64
+ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
+{
+ __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 |
+ (__u64)buf->reserved1 << 32;
+
+ return addr;
+}
+
/* issued to ublk driver via /dev/ublkcN */
struct ublksrv_io_cmd {
__u16 q_id;