summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 10:16:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 10:16:56 -0700
commite1b1d03ceec343362524318c076b110066ffe305 (patch)
treeca71105f13d893118eab4feb826d20cca066c53d /include/linux
parent5832d26433f2bd0d28f8b12526e3c2fdb203507f (diff)
parent130e6de62107116eba124647116276266be0f84c (diff)
Merge tag 'for-6.18/block-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block updates from Jens Axboe: - NVMe pull request via Keith: - FC target fixes (Daniel) - Authentication fixes and updates (Martin, Chris) - Admin controller handling (Kamaljit) - Target lockdep assertions (Max) - Keep-alive updates for discovery (Alastair) - Suspend quirk (Georg) - MD pull request via Yu: - Add support for a lockless bitmap. A key feature for the new bitmap are that the IO fastpath is lockless. If a user issues lots of write IO to the same bitmap bit in a short time, only the first write has additional overhead to update bitmap bit, no additional overhead for the following writes. By supporting only resync or recover written data, means in the case creating new array or replacing with a new disk, there is no need to do a full disk resync/recovery. - Switch ->getgeo() and ->bios_param() to using struct gendisk rather than struct block_device. - Rust block changes via Andreas. This series adds configuration via configfs and remote completion to the rnull driver. The series also includes a set of changes to the rust block device driver API: a few cleanup patches, and a few features supporting the rnull changes. The series removes the raw buffer formatting logic from `kernel::block` and improves the logic available in `kernel::string` to support the same use as the removed logic. - floppy arch cleanups - Reduce the number of dereferencing needed for ublk commands - Restrict supported sockets for nbd. Mostly done to eliminate a class of issues perpetually reported by syzbot, by using nonsensical socket setups. - A few s390 dasd block fixes - Fix a few issues around atomic writes - Improve DMA interation for integrity requests - Improve how iovecs are treated with regards to O_DIRECT aligment constraints. We used to require each segment to adhere to the constraints, now only the request as a whole needs to. - Clean up and improve p2p support, enabling use of p2p for metadata payloads - Improve locking of request lookup, using SRCU where appropriate - Use page references properly for brd, avoiding very long RCU sections - Fix ordering of recursively submitted IOs - Clean up and improve updating nr_requests for a live device - Various fixes and cleanups * tag 'for-6.18/block-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (164 commits) s390/dasd: enforce dma_alignment to ensure proper buffer validation s390/dasd: Return BLK_STS_INVAL for EINVAL from do_dasd_request ublk: remove redundant zone op check in ublk_setup_iod() nvme: Use non zero KATO for persistent discovery connections nvmet: add safety check for subsys lock nvme-core: use nvme_is_io_ctrl() for I/O controller check nvme-core: do ioccsz/iorcsz validation only for I/O controllers nvme-core: add method to check for an I/O controller blk-cgroup: fix possible deadlock while configuring policy blk-mq: fix null-ptr-deref in blk_mq_free_tags() from error path blk-mq: Fix more tag iteration function documentation selftests: ublk: fix behavior when fio is not installed ublk: don't access ublk_queue in ublk_unmap_io() ublk: pass ublk_io to __ublk_complete_rq() ublk: don't access ublk_queue in ublk_need_complete_req() ublk: don't access ublk_queue in ublk_check_commit_and_fetch() ublk: don't pass ublk_queue to ublk_fetch() ublk: don't access ublk_queue in ublk_config_io_buf() ublk: don't access ublk_queue in ublk_check_fetch_buf() ublk: pass q_id and tag to __ublk_check_and_get_req() ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio-integrity.h1
-rw-r--r--include/linux/bio.h18
-rw-r--r--include/linux/blk-integrity.h32
-rw-r--r--include/linux/blk-mq-dma.h25
-rw-r--r--include/linux/blk-mq.h4
-rw-r--r--include/linux/blk_types.h19
-rw-r--r--include/linux/blkdev.h26
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/uio.h2
9 files changed, 98 insertions, 31 deletions
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 0a25716820fe..851254f36eb3 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -13,6 +13,7 @@ enum bip_flags {
BIP_CHECK_GUARD = 1 << 5, /* guard check */
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
+ BIP_P2P_DMA = 1 << 8, /* using P2P address */
};
struct bio_integrity_payload {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 46ffac5caab7..a64a30131031 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -322,8 +322,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
-int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
- unsigned *segs, unsigned max_bytes);
+int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
+ unsigned *segs, unsigned max_bytes, unsigned len_align);
/**
* bio_next_split - get next @sectors from a bio, splitting if necessary
@@ -405,6 +405,11 @@ struct request_queue;
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf);
+static inline void bio_init_inline(struct bio *bio, struct block_device *bdev,
+ unsigned short max_vecs, blk_opf_t opf)
+{
+ bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf);
+}
extern void bio_uninit(struct bio *);
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
void bio_chain(struct bio *, struct bio *);
@@ -441,7 +446,14 @@ int submit_bio_wait(struct bio *bio);
int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
size_t len, enum req_op op);
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
+int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter,
+ unsigned len_align_mask);
+
+static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+ return bio_iov_iter_get_pages_aligned(bio, iter, 0);
+}
+
void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index e67a2b6e8f11..b659373788f6 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -4,6 +4,7 @@
#include <linux/blk-mq.h>
#include <linux/bio-integrity.h>
+#include <linux/blk-mq-dma.h>
struct request;
@@ -26,11 +27,25 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
#ifdef CONFIG_BLK_DEV_INTEGRITY
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
+
+static inline bool blk_rq_integrity_dma_unmap(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ size_t mapped_len)
+{
+ return blk_dma_unmap(req, dma_dev, state, mapped_len,
+ bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA);
+}
+
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes);
int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp);
+bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ struct blk_dma_iter *iter);
+bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter);
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -109,12 +124,29 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
{
return 0;
}
+static inline bool blk_rq_integrity_dma_unmap(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ size_t mapped_len)
+{
+ return false;
+}
static inline int blk_rq_integrity_map_user(struct request *rq,
void __user *ubuf,
ssize_t bytes)
{
return -EINVAL;
}
+static inline bool blk_rq_integrity_dma_map_iter_start(struct request *req,
+ struct device *dma_dev, struct dma_iova_state *state,
+ struct blk_dma_iter *iter)
+{
+ return false;
+}
+static inline bool blk_rq_integrity_dma_map_iter_next(struct request *req,
+ struct device *dma_dev, struct blk_dma_iter *iter)
+{
+ return false;
+}
static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
{
return NULL;
diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index c26a01aeae00..51829958d872 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -5,6 +5,13 @@
#include <linux/blk-mq.h>
#include <linux/pci-p2pdma.h>
+struct blk_map_iter {
+ struct bvec_iter iter;
+ struct bio *bio;
+ struct bio_vec *bvecs;
+ bool is_integrity;
+};
+
struct blk_dma_iter {
/* Output address range for this iteration */
dma_addr_t addr;
@@ -14,7 +21,7 @@ struct blk_dma_iter {
blk_status_t status;
/* Internal to blk_rq_dma_map_iter_* */
- struct req_iterator iter;
+ struct blk_map_iter iter;
struct pci_p2pdma_map_state p2pdma;
};
@@ -36,19 +43,20 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
}
/**
- * blk_rq_dma_unmap - try to DMA unmap a request
+ * blk_dma_unmap - try to DMA unmap a request
* @req: request to unmap
* @dma_dev: device to unmap from
* @state: DMA IOVA state
* @mapped_len: number of bytes to unmap
+ * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR
*
* Returns %false if the callers need to manually unmap every DMA segment
* mapped using @iter or %true if no work is left to be done.
*/
-static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
- struct dma_iova_state *state, size_t mapped_len)
+static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
{
- if (req->cmd_flags & REQ_P2PDMA)
+ if (is_p2p)
return true;
if (dma_use_iova(state)) {
@@ -60,4 +68,11 @@ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
return !dma_need_unmap(dma_dev);
}
+static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, size_t mapped_len)
+{
+ return blk_dma_unmap(req, dma_dev, state, mapped_len,
+ req->cmd_flags & REQ_P2PDMA);
+}
+
#endif /* BLK_MQ_DMA_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2a5a828f19a0..b25d12545f46 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -507,6 +507,8 @@ enum hctx_type {
* request_queue.tag_set_list.
* @srcu: Use as lock when type of the request queue is blocking
* (BLK_MQ_F_BLOCKING).
+ * @tags_srcu: SRCU used to defer freeing of tags page_list to prevent
+ * use-after-free when iterating tags.
* @update_nr_hwq_lock:
* Synchronize updating nr_hw_queues with add/del disk &
* switching elevator.
@@ -531,6 +533,7 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
struct srcu_struct *srcu;
+ struct srcu_struct tags_srcu;
struct rw_semaphore update_nr_hwq_lock;
};
@@ -767,6 +770,7 @@ struct blk_mq_tags {
* request pool
*/
spinlock_t lock;
+ struct rcu_head rcu_head;
};
static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 09b99d52fd36..8e8d1cc8b06c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -198,10 +198,6 @@ static inline bool blk_path_error(blk_status_t error)
return true;
}
-struct bio_issue {
- u64 value;
-};
-
typedef __u32 __bitwise blk_opf_t;
typedef unsigned int blk_qc_t;
@@ -242,7 +238,8 @@ struct bio {
* on release of the bio.
*/
struct blkcg_gq *bi_blkg;
- struct bio_issue bi_issue;
+ /* Time that this bio was issued. */
+ u64 issue_time_ns;
#ifdef CONFIG_BLK_CGROUP_IOCOST
u64 bi_iocost_cost;
#endif
@@ -269,18 +266,16 @@ struct bio {
struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
-
- /*
- * We can inline a number of vecs at the end of the bio, to avoid
- * double allocations for a small number of bio_vecs. This member
- * MUST obviously be kept at the very end of the bio.
- */
- struct bio_vec bi_inline_vecs[];
};
#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT)
+static inline struct bio_vec *bio_inline_vecs(struct bio *bio)
+{
+ return (struct bio_vec *)(bio + 1);
+}
+
/*
* bio flags
*/
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fe1797bbec42..066e5309bd45 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -657,6 +657,7 @@ enum {
QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */
+ QUEUE_FLAG_BIO_ISSUE_TIME, /* record bio->issue_time_ns */
QUEUE_FLAG_MAX
};
@@ -999,6 +1000,8 @@ extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
void submit_bio_noacct(struct bio *bio);
struct bio *bio_split_to_limits(struct bio *bio);
+struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
+ struct bio_set *bs);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
@@ -1590,13 +1593,6 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
return queue_dma_alignment(bdev_get_queue(bdev));
}
-static inline bool bdev_iter_is_aligned(struct block_device *bdev,
- struct iov_iter *iter)
-{
- return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev),
- bdev_logical_block_size(bdev) - 1);
-}
-
static inline unsigned int
blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
{
@@ -1660,7 +1656,7 @@ struct block_device_operations {
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
void (*unlock_native_capacity) (struct gendisk *);
- int (*getgeo)(struct block_device *, struct hd_geometry *);
+ int (*getgeo)(struct gendisk *, struct hd_geometry *);
int (*set_read_only)(struct block_device *bdev, bool ro);
void (*free_disk)(struct gendisk *disk);
/* this callback is with swap_lock and sometimes page table lock held */
@@ -1870,6 +1866,20 @@ bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
}
+static inline int bio_split_rw_at(struct bio *bio,
+ const struct queue_limits *lim,
+ unsigned *segs, unsigned max_bytes)
+{
+ return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment);
+}
+
+static inline int bio_iov_iter_get_bdev_pages(struct bio *bio,
+ struct iov_iter *iter, struct block_device *bdev)
+{
+ return bio_iov_iter_get_pages_aligned(bio, iter,
+ bdev_logical_block_size(bdev) - 1);
+}
+
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0620dd67369f..21de0935775d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1203,7 +1203,7 @@ extern void ata_qc_complete(struct ata_queued_cmd *qc);
extern u64 ata_qc_get_active(struct ata_port *ap);
extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
extern int ata_std_bios_param(struct scsi_device *sdev,
- struct block_device *bdev,
+ struct gendisk *unused,
sector_t capacity, int geom[]);
extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev);
extern int ata_scsi_sdev_init(struct scsi_device *sdev);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2e86c653186c..5b127043a151 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -286,8 +286,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#endif
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
-bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
- unsigned len_mask);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,