summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-17 08:48:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-17 08:48:45 -0800
commit99dfe2d4da67d863ff8f185d1e8033cce28e4c49 (patch)
treea5afa6d4923bf972103f10c176d2c65d0f1ba9a1
parent7b751b01ade7f666de2f5c365bd9562c2dcd7d60 (diff)
parentdfe48ea179733be948c432f6af2fc3913cf5dd28 (diff)
Merge tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull more block updates from Jens Axboe: - Fix partial IOVA mapping cleanup in error handling - Minor prep series ignoring discard return value, as the inline value is always known - Ensure BLK_FEAT_STABLE_WRITES is set for drbd - Fix leak of folio in bio_iov_iter_bounce_read() - Allow IOC_PR_READ_* for read-only open - Another debugfs deadlock fix - A few doc updates * tag 'block-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: blk-mq: use NOIO context to prevent deadlock during debugfs creation blk-stat: convert struct blk_stat_callback to kernel-doc block: fix enum descriptions kernel-doc block: update docs for bio and bvec_iter block: change return type to void nvmet: ignore discard return value md: ignore discard return value block: fix partial IOVA mapping cleanup in blk_rq_dma_map_iova block: fix folio leak in bio_iov_iter_bounce_read() block: allow IOC_PR_READ_* ioctls with BLK_OPEN_READ drbd: always set BLK_FEAT_STABLE_WRITES
-rw-r--r--block/bio.c4
-rw-r--r--block/blk-lib.c3
-rw-r--r--block/blk-mq-debugfs.c10
-rw-r--r--block/blk-mq-dma.c13
-rw-r--r--block/blk-mq-sched.c9
-rw-r--r--block/blk-stat.h9
-rw-r--r--block/blk-sysfs.c9
-rw-r--r--block/blk-wbt.c10
-rw-r--r--block/blk.h31
-rw-r--r--block/ioctl.c34
-rw-r--r--drivers/block/drbd/drbd_main.c3
-rw-r--r--drivers/block/drbd/drbd_nl.c20
-rw-r--r--drivers/md/md.c4
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c28
-rw-r--r--include/linux/blk_types.h33
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/bvec.h29
-rw-r--r--kernel/trace/blktrace.c38
18 files changed, 183 insertions, 106 deletions
diff --git a/block/bio.c b/block/bio.c
index b291b9aaeee1..8203bb7455a9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1382,8 +1382,10 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter)
ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len,
&bio->bi_vcnt, bio->bi_max_vecs - 1, 0);
if (ret <= 0) {
- if (!bio->bi_vcnt)
+ if (!bio->bi_vcnt) {
+ folio_put(folio);
return ret;
+ }
break;
}
len -= ret;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 0be3acdc3eb5..3213afc7f0d5 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -60,7 +60,7 @@ struct bio *blk_alloc_discard_bio(struct block_device *bdev,
return bio;
}
-int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+void __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
struct bio *bio;
@@ -68,7 +68,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects,
gfp_mask)))
*biop = bio_chain_and_submit(*biop, bio);
- return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index faeaa1fc86a7..28167c9baa55 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -614,11 +614,6 @@ static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
{
lockdep_assert_held(&q->debugfs_mutex);
/*
- * Creating new debugfs entries with queue freezed has the risk of
- * deadlock.
- */
- WARN_ON_ONCE(q->mq_freeze_depth != 0);
- /*
* debugfs_mutex should not be nested under other locks that can be
* grabbed while queue is frozen.
*/
@@ -693,12 +688,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
void blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
+ unsigned int memflags;
unsigned long i;
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_register_hctx(q, hctx);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
}
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
diff --git a/block/blk-mq-dma.c b/block/blk-mq-dma.c
index 3c87779cdc19..bfdb9ed70741 100644
--- a/block/blk-mq-dma.c
+++ b/block/blk-mq-dma.c
@@ -121,17 +121,20 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
vec->len, dir, attrs);
if (error)
- break;
+ goto out_unlink;
mapped += vec->len;
} while (blk_map_iter_next(req, &iter->iter, vec));
error = dma_iova_sync(dma_dev, state, 0, mapped);
- if (error) {
- iter->status = errno_to_blk_status(error);
- return false;
- }
+ if (error)
+ goto out_unlink;
return true;
+
+out_unlink:
+ dma_iova_destroy(dma_dev, state, mapped, dir, attrs);
+ iter->status = errno_to_blk_status(error);
+ return false;
}
static inline void blk_rq_map_iter_init(struct request *rq,
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index e26898128a7e..97c3c8f45a9b 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -390,13 +390,14 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla
void blk_mq_sched_reg_debugfs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
+ unsigned int memflags;
unsigned long i;
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
blk_mq_debugfs_register_sched(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_register_sched_hctx(q, hctx);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
}
void blk_mq_sched_unreg_debugfs(struct request_queue *q)
@@ -404,11 +405,11 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
unsigned long i;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_unregister_sched_hctx(hctx);
blk_mq_debugfs_unregister_sched(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
}
void blk_mq_free_sched_tags(struct elevator_tags *et,
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 9e05bf18d1be..cc5b66e7ee60 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -17,7 +17,7 @@
* timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked.
*/
struct blk_stat_callback {
- /*
+ /**
* @list: RCU list of callbacks for a &struct request_queue.
*/
struct list_head list;
@@ -50,7 +50,7 @@ struct blk_stat_callback {
struct blk_rq_stat *stat;
/**
- * @fn: Callback function.
+ * @timer_fn: Callback function.
*/
void (*timer_fn)(struct blk_stat_callback *);
@@ -59,6 +59,9 @@ struct blk_stat_callback {
*/
void *data;
+ /**
+ * @rcu: rcu list head
+ */
struct rcu_head rcu;
};
@@ -126,6 +129,8 @@ void blk_stat_free_callback(struct blk_stat_callback *cb);
* blk_stat_is_active() - Check if a block statistics callback is currently
* gathering statistics.
* @cb: The callback.
+ *
+ * Returns: %true iff the callback is active.
*/
static inline bool blk_stat_is_active(struct blk_stat_callback *cb)
{
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 003aa684e854..f3b1968c80ce 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -892,13 +892,13 @@ static void blk_debugfs_remove(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
blk_trace_shutdown(q);
debugfs_remove_recursive(q->debugfs_dir);
q->debugfs_dir = NULL;
q->sched_debugfs_dir = NULL;
q->rqos_debugfs_dir = NULL;
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
}
/**
@@ -908,6 +908,7 @@ static void blk_debugfs_remove(struct gendisk *disk)
int blk_register_queue(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
+ unsigned int memflags;
int ret;
ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
@@ -921,11 +922,11 @@ int blk_register_queue(struct gendisk *disk)
}
mutex_lock(&q->sysfs_lock);
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
if (queue_is_mq(q))
blk_mq_debugfs_register(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
ret = disk_register_independent_access_ranges(disk);
if (ret)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 1415f2bf8611..6dba71e87387 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -776,6 +776,7 @@ void wbt_init_enable_default(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct rq_wb *rwb;
+ unsigned int memflags;
if (!__wbt_enable_default(disk))
return;
@@ -789,9 +790,9 @@ void wbt_init_enable_default(struct gendisk *disk)
return;
}
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
blk_mq_debugfs_register_rq_qos(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
}
static u64 wbt_default_latency_nsec(struct request_queue *q)
@@ -1015,9 +1016,10 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
blk_mq_unquiesce_queue(q);
out:
blk_mq_unfreeze_queue(q, memflags);
- mutex_lock(&q->debugfs_mutex);
+
+ memflags = blk_debugfs_lock(q);
blk_mq_debugfs_register_rq_qos(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
return ret;
}
diff --git a/block/blk.h b/block/blk.h
index a6b1de509733..f6053e9dd2aa 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -729,4 +729,35 @@ static inline void blk_unfreeze_release_lock(struct request_queue *q)
}
#endif
+/*
+ * debugfs directory and file creation can trigger fs reclaim, which can enter
+ * back into the block layer request_queue. This can cause deadlock if the
+ * queue is frozen. Use NOIO context together with debugfs_mutex to prevent fs
+ * reclaim from triggering block I/O.
+ */
+static inline void blk_debugfs_lock_nomemsave(struct request_queue *q)
+{
+ mutex_lock(&q->debugfs_mutex);
+}
+
+static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q)
+{
+ mutex_unlock(&q->debugfs_mutex);
+}
+
+static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q)
+{
+ unsigned int memflags = memalloc_noio_save();
+
+ blk_debugfs_lock_nomemsave(q);
+ return memflags;
+}
+
+static inline void blk_debugfs_unlock(struct request_queue *q,
+ unsigned int memflags)
+{
+ blk_debugfs_unlock_nomemrestore(q);
+ memalloc_noio_restore(memflags);
+}
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/ioctl.c b/block/ioctl.c
index fd48f82f9f03..0b04661ac809 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -318,7 +318,13 @@ int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode,
EXPORT_SYMBOL(blkdev_compat_ptr_ioctl);
#endif
-static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode)
+enum pr_direction {
+ PR_IN, /* read from device */
+ PR_OUT, /* write to device */
+};
+
+static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode,
+ enum pr_direction dir)
{
/* no sense to make reservations for partitions */
if (bdev_is_partition(bdev))
@@ -326,11 +332,17 @@ static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode)
if (capable(CAP_SYS_ADMIN))
return true;
+
/*
- * Only allow unprivileged reservations if the file descriptor is open
- * for writing.
+ * Only allow unprivileged reservation _out_ commands if the file
+ * descriptor is open for writing. Allow reservation _in_ commands if
+ * the file descriptor is open for reading since they do not modify the
+ * device.
*/
- return mode & BLK_OPEN_WRITE;
+ if (dir == PR_IN)
+ return mode & BLK_OPEN_READ;
+ else
+ return mode & BLK_OPEN_WRITE;
}
static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode,
@@ -339,7 +351,7 @@ static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_registration reg;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_register)
return -EOPNOTSUPP;
@@ -357,7 +369,7 @@ static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_reserve)
return -EOPNOTSUPP;
@@ -375,7 +387,7 @@ static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_release)
return -EOPNOTSUPP;
@@ -393,7 +405,7 @@ static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_preempt p;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_preempt)
return -EOPNOTSUPP;
@@ -411,7 +423,7 @@ static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode,
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_clear c;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_OUT))
return -EPERM;
if (!ops || !ops->pr_clear)
return -EOPNOTSUPP;
@@ -434,7 +446,7 @@ static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode,
size_t keys_copy_len;
int ret;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_IN))
return -EPERM;
if (!ops || !ops->pr_read_keys)
return -EOPNOTSUPP;
@@ -486,7 +498,7 @@ static int blkdev_pr_read_reservation(struct block_device *bdev,
struct pr_read_reservation out = {};
int ret;
- if (!blkdev_pr_allowed(bdev, mode))
+ if (!blkdev_pr_allowed(bdev, mode, PR_IN))
return -EPERM;
if (!ops || !ops->pr_read_reservation)
return -EOPNOTSUPP;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c73376886e7a..1f6ac9202b66 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2659,9 +2659,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
* connect.
*/
.max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
- .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
- BLK_FEAT_ROTATIONAL |
- BLK_FEAT_STABLE_WRITES,
};
device = minor_to_device(minor);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 91f3b8afb63c..b502038be0a9 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1296,6 +1296,8 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device,
lim.max_segments = drbd_backing_dev_max_segments(device);
} else {
lim.max_segments = BLK_MAX_SEGMENTS;
+ lim.features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_ROTATIONAL | BLK_FEAT_STABLE_WRITES;
}
lim.max_hw_sectors = new >> SECTOR_SHIFT;
@@ -1318,8 +1320,24 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device,
lim.max_hw_discard_sectors = 0;
}
- if (bdev)
+ if (bdev) {
blk_stack_limits(&lim, &b->limits, 0);
+ /*
+ * blk_set_stacking_limits() cleared the features, and
+ * blk_stack_limits() may or may not have inherited
+ * BLK_FEAT_STABLE_WRITES from the backing device.
+ *
+ * DRBD always requires stable writes because:
+ * 1. The same bio data is read for both local disk I/O and
+ * network transmission. If the page changes mid-flight,
+ * the local and remote copies could diverge.
+ * 2. When data integrity is enabled, DRBD calculates a
+ * checksum before sending the data. If the page changes
+ * between checksum calculation and transmission, the
+ * receiver will detect a checksum mismatch.
+ */
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+ }
/*
* If we can handle "zeroes" efficiently on the protocol, we want to do
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9ca5d74fd2b2..72a1c7267851 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9179,8 +9179,8 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
{
struct bio *discard_bio = NULL;
- if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
- &discard_bio) || !discard_bio)
+ __blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, &discard_bio);
+ if (!discard_bio)
return;
bio_chain(discard_bio, bio);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 0103815542d4..f15d1c213bc6 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -363,29 +363,14 @@ u16 nvmet_bdev_flush(struct nvmet_req *req)
return 0;
}
-static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
- struct nvme_dsm_range *range, struct bio **bio)
-{
- struct nvmet_ns *ns = req->ns;
- int ret;
-
- ret = __blkdev_issue_discard(ns->bdev,
- nvmet_lba_to_sect(ns, range->slba),
- le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
- GFP_KERNEL, bio);
- if (ret && ret != -EOPNOTSUPP) {
- req->error_slba = le64_to_cpu(range->slba);
- return errno_to_nvme_status(req, ret);
- }
- return NVME_SC_SUCCESS;
-}
-
static void nvmet_bdev_execute_discard(struct nvmet_req *req)
{
+ struct nvmet_ns *ns = req->ns;
struct nvme_dsm_range range;
struct bio *bio = NULL;
+ sector_t nr_sects;
int i;
- u16 status;
+ u16 status = NVME_SC_SUCCESS;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
@@ -393,9 +378,10 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
if (status)
break;
- status = nvmet_bdev_discard_range(req, &range, &bio);
- if (status)
- break;
+ nr_sects = le32_to_cpu(range.nlb) << (ns->blksize_shift - 9);
+ __blkdev_issue_discard(ns->bdev,
+ nvmet_lba_to_sect(ns, range.slba), nr_sects,
+ GFP_KERNEL, &bio);
}
if (bio) {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d59553324a84..8808ee76e73c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -273,7 +273,13 @@ struct bio {
* Everything starting with bi_max_vecs will be preserved by bio_reset()
*/
- unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
+ /*
+ * Number of elements in `bi_io_vec` that were allocated for this bio.
+ * Only used by the bio submitter to make `bio_add_page` fail once full
+ * and to free the `bi_io_vec` allocation. Must not be used in drivers
+ * and does not hold a useful value for cloned bios.
+ */
+ unsigned short bi_max_vecs;
atomic_t __bi_cnt; /* pin count */
@@ -339,32 +345,33 @@ typedef __u32 __bitwise blk_mq_req_flags_t;
* meaning.
*/
enum req_op {
- /* read sectors from the device */
+ /** @REQ_OP_READ: read sectors from the device */
REQ_OP_READ = (__force blk_opf_t)0,
- /* write sectors to the device */
+ /** @REQ_OP_WRITE: write sectors to the device */
REQ_OP_WRITE = (__force blk_opf_t)1,
- /* flush the volatile write cache */
+ /** @REQ_OP_FLUSH: flush the volatile write cache */
REQ_OP_FLUSH = (__force blk_opf_t)2,
- /* discard sectors */
+ /** @REQ_OP_DISCARD: discard sectors */
REQ_OP_DISCARD = (__force blk_opf_t)3,
- /* securely erase sectors */
+ /** @REQ_OP_SECURE_ERASE: securely erase sectors */
REQ_OP_SECURE_ERASE = (__force blk_opf_t)5,
- /* write data at the current zone write pointer */
+ /** @REQ_OP_ZONE_APPEND: write data at the current zone write pointer */
REQ_OP_ZONE_APPEND = (__force blk_opf_t)7,
- /* write the zero filled sector many times */
+ /** @REQ_OP_WRITE_ZEROES: write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9,
- /* Open a zone */
+ /** @REQ_OP_ZONE_OPEN: Open a zone */
REQ_OP_ZONE_OPEN = (__force blk_opf_t)11,
- /* Close a zone */
+ /** @REQ_OP_ZONE_CLOSE: Close a zone */
REQ_OP_ZONE_CLOSE = (__force blk_opf_t)13,
- /* Transition a zone to full */
+ /** @REQ_OP_ZONE_FINISH: Transition a zone to full */
REQ_OP_ZONE_FINISH = (__force blk_opf_t)15,
- /* reset a zone write pointer */
+ /** @REQ_OP_ZONE_RESET: reset a zone write pointer */
REQ_OP_ZONE_RESET = (__force blk_opf_t)17,
- /* reset all the zone present on the device */
+ /** @REQ_OP_ZONE_RESET_ALL: reset all the zone present on the device */
REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)19,
/* Driver private requests */
+ /* private: */
REQ_OP_DRV_IN = (__force blk_opf_t)34,
REQ_OP_DRV_OUT = (__force blk_opf_t)35,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99ef8cd7673c..d463b9b5a0a5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1259,7 +1259,7 @@ extern void blk_io_schedule(void);
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
-int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+void __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop);
int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 3fc0efa0825b..06fb60471aaf 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -75,14 +75,27 @@ static inline void bvec_set_virt(struct bio_vec *bv, void *vaddr,
}
struct bvec_iter {
- sector_t bi_sector; /* device address in 512 byte
- sectors */
- unsigned int bi_size; /* residual I/O count */
-
- unsigned int bi_idx; /* current index into bvl_vec */
-
- unsigned int bi_bvec_done; /* number of bytes completed in
- current bvec */
+ /*
+ * Current device address in 512 byte sectors. Only updated by the bio
+ * iter wrappers and not the bvec iterator helpers themselves.
+ */
+ sector_t bi_sector;
+
+ /*
+ * Remaining size in bytes.
+ */
+ unsigned int bi_size;
+
+ /*
+ * Current index into the bvec array. This indexes into `bi_io_vec` when
+ * iterating a bvec array that is part of a `bio`.
+ */
+ unsigned int bi_idx;
+
+ /*
+ * Current offset in the bvec entry pointed to by `bi_idx`.
+ */
+ unsigned int bi_bvec_done;
} __packed __aligned(4);
struct bvec_iter_all {
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index f2de9cf15d0e..e6988929ead2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -559,9 +559,9 @@ int blk_trace_remove(struct request_queue *q)
{
int ret;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
ret = __blk_trace_remove(q);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
return ret;
}
@@ -767,6 +767,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct blk_user_trace_setup2 buts2;
struct blk_user_trace_setup buts;
struct blk_trace *bt;
+ unsigned int memflags;
int ret;
ret = copy_from_user(&buts, arg, sizeof(buts));
@@ -785,16 +786,16 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
.pid = buts.pid,
};
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
bdev);
if (IS_ERR(bt)) {
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
return PTR_ERR(bt);
}
blk_trace_setup_finalize(q, name, 1, bt, &buts2);
strscpy(buts.name, buts2.name, BLKTRACE_BDEV_SIZE);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
if (copy_to_user(arg, &buts, sizeof(buts))) {
blk_trace_remove(q);
@@ -809,6 +810,7 @@ static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev,
{
struct blk_user_trace_setup2 buts2;
struct blk_trace *bt;
+ unsigned int memflags;
if (copy_from_user(&buts2, arg, sizeof(buts2)))
return -EFAULT;
@@ -819,15 +821,15 @@ static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev,
if (buts2.flags != 0)
return -EINVAL;
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
bdev);
if (IS_ERR(bt)) {
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
return PTR_ERR(bt);
}
blk_trace_setup_finalize(q, name, 2, bt, &buts2);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
if (copy_to_user(arg, &buts2, sizeof(buts2))) {
blk_trace_remove(q);
@@ -844,6 +846,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
struct blk_user_trace_setup2 buts2;
struct compat_blk_user_trace_setup cbuts;
struct blk_trace *bt;
+ unsigned int memflags;
if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
return -EFAULT;
@@ -860,15 +863,15 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
.pid = cbuts.pid,
};
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
bdev);
if (IS_ERR(bt)) {
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
return PTR_ERR(bt);
}
blk_trace_setup_finalize(q, name, 1, bt, &buts2);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) {
blk_trace_remove(q);
@@ -898,9 +901,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
ret = __blk_trace_startstop(q, start);
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
return ret;
}
@@ -2041,7 +2044,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct blk_trace *bt;
ssize_t ret = -ENXIO;
- mutex_lock(&q->debugfs_mutex);
+ blk_debugfs_lock_nomemsave(q);
bt = rcu_dereference_protected(q->blk_trace,
lockdep_is_held(&q->debugfs_mutex));
@@ -2062,7 +2065,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", bt->end_lba);
out_unlock_bdev:
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock_nomemrestore(q);
return ret;
}
@@ -2073,6 +2076,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
struct block_device *bdev = dev_to_bdev(dev);
struct request_queue *q = bdev_get_queue(bdev);
struct blk_trace *bt;
+ unsigned int memflags;
u64 value;
ssize_t ret = -EINVAL;
@@ -2092,7 +2096,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out;
}
- mutex_lock(&q->debugfs_mutex);
+ memflags = blk_debugfs_lock(q);
bt = rcu_dereference_protected(q->blk_trace,
lockdep_is_held(&q->debugfs_mutex));
@@ -2127,7 +2131,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
}
out_unlock_bdev:
- mutex_unlock(&q->debugfs_mutex);
+ blk_debugfs_unlock(q, memflags);
out:
return ret ? ret : count;
}