summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h25
-rw-r--r--include/linux/blk-mq.h10
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/blkdev.h24
-rw-r--r--include/linux/dmapool.h21
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/io_uring/cmd.h9
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--include/linux/nvme.h77
-rw-r--r--include/linux/part_stat.h2
10 files changed, 165 insertions, 15 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b786ec5bcc81..9c37c66ef9ca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -403,7 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
struct request_queue;
-extern int submit_bio_wait(struct bio *bio);
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf);
extern void bio_uninit(struct bio *);
@@ -418,6 +417,30 @@ void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
size_t off);
+void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len);
+
+/**
+ * bio_add_max_vecs - number of bio_vecs needed to add data to a bio
+ * @kaddr: kernel virtual address to add
+ * @len: length in bytes to add
+ *
+ * Calculate how many bio_vecs need to be allocated to add the kernel virtual
+ * address range in [@kaddr:@len] in the worse case.
+ */
+static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len)
+{
+ if (is_vmalloc_addr(kaddr))
+ return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE);
+ return 1;
+}
+
+unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len);
+bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len);
+
+int submit_bio_wait(struct bio *bio);
+int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
+ size_t len, enum req_op op);
+
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8eb9b3310167..de8c85a03bb7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -9,6 +9,7 @@
#include <linux/prefetch.h>
#include <linux/srcu.h>
#include <linux/rw_hint.h>
+#include <linux/rwsem.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -506,6 +507,9 @@ enum hctx_type {
* request_queue.tag_set_list.
* @srcu: Use as lock when type of the request queue is blocking
* (BLK_MQ_F_BLOCKING).
+ * @update_nr_hwq_lock:
+ * Synchronize updating nr_hw_queues with add/del disk &
+ * switching elevator.
*/
struct blk_mq_tag_set {
const struct blk_mq_ops *ops;
@@ -527,6 +531,8 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
struct srcu_struct *srcu;
+
+ struct rw_semaphore update_nr_hwq_lock;
};
/**
@@ -1031,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *,
int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *, gfp_t);
int blk_rq_unmap_user(struct bio *);
-int blk_rq_map_kern(struct request_queue *, struct request *, void *,
- unsigned int, gfp_t);
+int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len,
+ gfp_t gfp);
int blk_rq_append_bio(struct request *rq, struct bio *bio);
void blk_execute_rq_nowait(struct request *rq, bool at_head);
blk_status_t blk_execute_rq(struct request *rq, bool at_head);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dce7615c35e7..3d1577f07c1c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -220,6 +220,7 @@ struct bio {
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
enum rw_hint bi_write_hint;
+ u8 bi_write_stream;
blk_status_t bi_status;
atomic_t __bi_remaining;
@@ -286,7 +287,6 @@ struct bio {
enum {
BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */
BIO_CLONED, /* doesn't own data */
- BIO_BOUNCED, /* bio is a bounce bio */
BIO_QUIET, /* Make BIO Quiet */
BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
BIO_REFFED, /* bio has elevated ->bi_cnt */
@@ -296,6 +296,14 @@ enum {
* of this bio. */
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
+ /*
+ * This bio has completed bps throttling at the single tg granularity,
+ * which is different from BIO_BPS_THROTTLED. When the bio is enqueued
+ * into the sq->queued of the upper tg, or is about to be dispatched,
+ * this flag needs to be cleared. Since blk-throttle and rq_qos are not
+ * on the same hierarchical level, reuse the value.
+ */
+ BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED,
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9a1f0ee40b56..332b56f323d9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -182,7 +182,6 @@ struct gendisk {
struct list_head slave_bdevs;
#endif
struct timer_rand_state *random;
- atomic_t sync_io; /* RAID */
struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_ZONED
@@ -218,6 +217,8 @@ struct gendisk {
* devices that do not have multiple independent access ranges.
*/
struct blk_independent_access_ranges *ia_ranges;
+
+ struct mutex rqos_state_mutex; /* rqos state change mutex */
};
/**
@@ -331,9 +332,6 @@ typedef unsigned int __bitwise blk_features_t;
/* skip this queue in blk_mq_(un)quiesce_tagset */
#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13))
-/* bounce all highmem pages */
-#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14))
-
/* undocumented magic for bcache */
#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
((__force blk_features_t)(1u << 15))
@@ -347,7 +345,7 @@ typedef unsigned int __bitwise blk_features_t;
*/
#define BLK_FEAT_INHERIT_MASK \
(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
- BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+ BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \
BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
/* internal flags in queue_limits.flags */
@@ -405,6 +403,9 @@ struct queue_limits {
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
+ unsigned short max_write_streams;
+ unsigned int write_stream_granularity;
+
unsigned int max_open_zones;
unsigned int max_active_zones;
@@ -644,6 +645,8 @@ enum {
QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */
QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */
QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
+ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
+ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
QUEUE_FLAG_MAX
};
@@ -679,6 +682,10 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
#define blk_queue_skip_tagset_quiesce(q) \
((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
+#define blk_queue_disable_wbt(q) \
+ test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags)
+#define blk_queue_no_elv_switch(q) \
+ test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -1288,6 +1295,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
return queue_max_segments(bdev_get_queue(bdev));
}
+static inline unsigned short bdev_max_write_streams(struct block_device *bdev)
+{
+ if (bdev_is_partition(bdev))
+ return 0;
+ return bdev_limits(bdev)->max_write_streams;
+}
+
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
return q->limits.logical_block_size;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb4238..06c4de602b2f 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -11,6 +11,7 @@
#ifndef LINUX_DMAPOOL_H
#define LINUX_DMAPOOL_H
+#include <linux/nodemask_types.h>
#include <linux/scatterlist.h>
#include <asm/io.h>
@@ -18,8 +19,8 @@ struct device;
#ifdef CONFIG_HAS_DMA
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+ size_t size, size_t align, size_t boundary, int node);
void dma_pool_destroy(struct dma_pool *pool);
@@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
void dmam_pool_destroy(struct dma_pool *pool);
#else /* !CONFIG_HAS_DMA */
-static inline struct dma_pool *dma_pool_create(const char *name,
- struct device *dev, size_t size, size_t align, size_t allocation)
-{ return NULL; }
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+ struct device *dev, size_t size, size_t align, size_t boundary,
+ int node)
+{
+ return NULL;
+}
static inline void dma_pool_destroy(struct dma_pool *pool) { }
static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle) { return NULL; }
@@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name,
static inline void dmam_pool_destroy(struct dma_pool *pool) { }
#endif /* !CONFIG_HAS_DMA */
+static inline struct dma_pool *dma_pool_create(const char *name,
+ struct device *dev, size_t size, size_t align, size_t boundary)
+{
+ return dma_pool_create_node(name, dev, size, align, boundary,
+ NUMA_NO_NODE);
+}
+
static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 81f8f0aec563..b807045614c6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -408,6 +408,7 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
+ u8 ki_write_stream;
union {
/*
* Only used for async buffered reads, where it denotes the
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 0634a3de1782..53408124c1e5 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
return cmd_to_io_kiocb(cmd)->async_data;
}
+/*
+ * Return uring_cmd's context reference as its context handle for driver to
+ * track per-context resource, such as registered kernel IO buffer
+ */
+static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
+{
+ return cmd_to_io_kiocb(cmd)->ctx;
+}
+
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
void (*release)(void *), unsigned int index,
unsigned int issue_flags);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ccec1bf2896..b1c459f7a485 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -148,7 +148,6 @@ enum zone_stat_item {
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
/* Second 128 byte cacheline */
- NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2479ed10f53e..51308f65b72f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -303,6 +303,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
NVME_CTRL_ATTR_RHII = (1 << 18),
+ NVME_CTRL_ATTR_FDPS = (1 << 19),
};
struct nvme_id_ctrl {
@@ -689,6 +690,44 @@ struct nvme_rotational_media_log {
__u8 rsvd24[488];
};
+struct nvme_fdp_config {
+ __u8 flags;
+#define FDPCFG_FDPE (1U << 0)
+ __u8 fdpcidx;
+ __le16 reserved;
+};
+
+struct nvme_fdp_ruh_desc {
+ __u8 ruht;
+ __u8 reserved[3];
+};
+
+struct nvme_fdp_config_desc {
+ __le16 dsze;
+ __u8 fdpa;
+ __u8 vss;
+ __le32 nrg;
+ __le16 nruh;
+ __le16 maxpids;
+ __le32 nns;
+ __le64 runs;
+ __le32 erutl;
+ __u8 rsvd28[36];
+ struct nvme_fdp_ruh_desc ruhs[];
+};
+
+struct nvme_fdp_config_log {
+ __le16 numfdpc;
+ __u8 ver;
+ __u8 rsvd3;
+ __le32 sze;
+ __u8 rsvd8[8];
+ /*
+ * This is followed by variable number of nvme_fdp_config_desc
+ * structures, but sparse doesn't like nested variable sized arrays.
+ */
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -915,6 +954,7 @@ enum nvme_opcode {
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
@@ -936,6 +976,7 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_register), \
nvme_opcode_name(nvme_cmd_resv_report), \
nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
@@ -1087,6 +1128,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_RW_DTYPE_DPLCMT = 2 << 4,
NVME_WZ_DEAC = 1 << 9,
};
@@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd {
__le32 cdw14[2];
};
+struct nvme_io_mgmt_recv_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 mo;
+ __u8 rsvd11;
+ __u16 mos;
+ __le32 numd;
+ __le32 cdw12[4];
+};
+
+enum {
+ NVME_IO_MGMT_RECV_MO_RUHS = 1,
+};
+
+struct nvme_fdp_ruh_status_desc {
+ __le16 pid;
+ __le16 ruhid;
+ __le32 earutr;
+ __le64 ruamw;
+ __u8 reserved[16];
+};
+
+struct nvme_fdp_ruh_status {
+ __u8 rsvd0[14];
+ __le16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
@@ -1309,6 +1383,7 @@ enum {
NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_HOST_BEHAVIOR = 0x16,
NVME_FEAT_SANITIZE = 0x17,
+ NVME_FEAT_FDP = 0x1d,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -1329,6 +1404,7 @@ enum {
NVME_LOG_ANA = 0x0c,
NVME_LOG_FEATURES = 0x12,
NVME_LOG_RMI = 0x16,
+ NVME_LOG_FDP_CONFIGS = 0x20,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1923,6 +1999,7 @@ struct nvme_command {
struct nvmf_auth_receive_command auth_receive;
struct nvme_dbbuf dbbuf;
struct nvme_directive_cmd directive;
+ struct nvme_io_mgmt_recv_cmd imr;
};
};
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index c5e9cac0575e..eeeff2a04529 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value)
#define part_stat_local_read_cpu(part, field, cpu) \
local_read(&(part_stat_get_cpu(part, field, cpu)))
+unsigned int bdev_count_inflight(struct block_device *part);
+
#endif /* _LINUX_PART_STAT_H */