summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-24 15:06:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-24 15:06:55 -0700
commitf3e3dbcea15e20f7413afd8c791a496f0b80e80b (patch)
treec86781264f4dde34121efe316c2cf39790d9c8e2 /drivers/block
parentfa58e6e9000c1cc76a7a0c06ea3e68d728cc4247 (diff)
parent895a9b37917d2718ef2240a7ead7458c22f1f011 (diff)
Merge tag 'block-7.1-20260424' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixes from Jens Axboe: - Series for zloop, fixing a variety of issues - t10-pi code cleanup - Fix for a merge window regression with the bio memory allocation mask - Fix for a merge window regression in ublk, caused by an issue with the maple tree iteration code at teardown - ublk self tests additions - Zoned device pgmap fixes - Various little cleanups and fixes * tag 'block-7.1-20260424' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (21 commits) Revert "floppy: fix reference leak on platform_device_register() failure" ublk: avoid unpinning pages under maple tree spinlock ublk: refactor common helper ublk_shmem_remove_ranges() ublk: fix maple tree lockdep warning in ublk_buf_cleanup selftests: ublk: add ublk auto integrity test selftests: ublk: enable test_integrity_02.sh on fio 3.42 selftests: ublk: remove unused argument to _cleanup block: only restrict bio allocation gfp mask asked to block block/blk-throttle: Add WQ_PERCPU to alloc_workqueue users block: Add WQ_PERCPU to alloc_workqueue users block: relax pgmap check in bio_add_page for compatible zone device pages block: add pgmap check to biovec_phys_mergeable floppy: fix reference leak on platform_device_register() failure ublk: use unchecked copy helpers for bio page data t10-pi: reduce ref tag code duplication zloop: remove irq-safe locking zloop: factor out zloop_mark_{full,empty} helpers zloop: set RQF_QUIET when completing requests on deleted devices zloop: improve the unaligned write pointer warning zloop: use vfs_truncate ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/ublk_drv.c123
-rw-r--r--drivers/block/zloop.c123
2 files changed, 139 insertions, 107 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 603a98a30989..8e5f3738c203 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1319,10 +1319,18 @@ static bool ublk_copy_user_bvec(const struct bio_vec *bv, unsigned *offset,
len = bv->bv_len - *offset;
bv_buf = kmap_local_page(bv->bv_page) + bv->bv_offset + *offset;
+ /*
+ * Bio pages may originate from slab caches without a usercopy region
+ * (e.g. jbd2 frozen metadata buffers). This is the same data that
+ * the loop driver writes to its backing file — no exposure risk.
+ * The bvec length is always trusted, so the size check in
+ * check_copy_size() is not needed either. Use the unchecked
+ * helpers to avoid false positives on slab pages.
+ */
if (dir == ITER_DEST)
- copied = copy_to_iter(bv_buf, len, uiter);
+ copied = _copy_to_iter(bv_buf, len, uiter);
else
- copied = copy_from_iter(bv_buf, len, uiter);
+ copied = _copy_from_iter(bv_buf, len, uiter);
kunmap_local(bv_buf);
@@ -5413,39 +5421,88 @@ err_free_pages:
return ret;
}
-static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index)
+static void ublk_unpin_range_pages(unsigned long base_pfn,
+ unsigned long nr_pages)
+{
+#define UBLK_UNPIN_BATCH 32
+ struct page *pages[UBLK_UNPIN_BATCH];
+ unsigned long off;
+
+ for (off = 0; off < nr_pages; ) {
+ unsigned int batch = min_t(unsigned long,
+ nr_pages - off, UBLK_UNPIN_BATCH);
+ unsigned int j;
+
+ for (j = 0; j < batch; j++)
+ pages[j] = pfn_to_page(base_pfn + off + j);
+ unpin_user_pages(pages, batch);
+ off += batch;
+ }
+}
+
+/*
+ * Inner loop: erase up to UBLK_REMOVE_BATCH matching ranges under
+ * mas_lock, collecting them into an xarray. Then drop the lock and
+ * unpin pages + free ranges outside spinlock context.
+ *
+ * Returns true if the tree walk completed, false if more ranges remain.
+ * Xarray key is the base PFN, value encodes nr_pages via xa_mk_value().
+ */
+#define UBLK_REMOVE_BATCH 64
+
+static bool __ublk_shmem_remove_ranges(struct ublk_device *ub,
+ int buf_index, int *ret)
{
MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
struct ublk_buf_range *range;
- struct page *pages[32];
- int ret = -ENOENT;
+ struct xarray to_unpin;
+ unsigned long idx;
+ unsigned int count = 0;
+ bool done = false;
+ void *entry;
+
+ xa_init(&to_unpin);
mas_lock(&mas);
mas_for_each(&mas, range, ULONG_MAX) {
- unsigned long base, nr, off;
+ unsigned long nr;
- if (range->buf_index != buf_index)
+ if (buf_index >= 0 && range->buf_index != buf_index)
continue;
- ret = 0;
- base = mas.index;
- nr = mas.last - base + 1;
+ *ret = 0;
+ nr = mas.last - mas.index + 1;
+ if (xa_err(xa_store(&to_unpin, mas.index,
+ xa_mk_value(nr), GFP_ATOMIC)))
+ goto unlock;
mas_erase(&mas);
-
- for (off = 0; off < nr; ) {
- unsigned int batch = min_t(unsigned long,
- nr - off, 32);
- unsigned int j;
-
- for (j = 0; j < batch; j++)
- pages[j] = pfn_to_page(base + off + j);
- unpin_user_pages(pages, batch);
- off += batch;
- }
kfree(range);
+ if (++count >= UBLK_REMOVE_BATCH)
+ goto unlock;
}
+ done = true;
+unlock:
mas_unlock(&mas);
+ xa_for_each(&to_unpin, idx, entry)
+ ublk_unpin_range_pages(idx, xa_to_value(entry));
+ xa_destroy(&to_unpin);
+
+ return done;
+}
+
+/*
+ * Remove ranges from the maple tree matching buf_index, unpin pages
+ * and free range structs. If buf_index < 0, remove all ranges.
+ * Processes ranges in batches to avoid holding the maple tree spinlock
+ * across potentially expensive page unpinning.
+ */
+static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index)
+{
+ int ret = -ENOENT;
+
+ while (!__ublk_shmem_remove_ranges(ub, buf_index, &ret))
+ cond_resched();
return ret;
}
@@ -5464,7 +5521,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
memflags = ublk_lock_buf_tree(ub);
- ret = __ublk_ctrl_unreg_buf(ub, index);
+ ret = ublk_shmem_remove_ranges(ub, index);
if (!ret)
ida_free(&ub->buf_ida, index);
@@ -5474,27 +5531,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub,
static void ublk_buf_cleanup(struct ublk_device *ub)
{
- MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX);
- struct ublk_buf_range *range;
- struct page *pages[32];
-
- mas_for_each(&mas, range, ULONG_MAX) {
- unsigned long base = mas.index;
- unsigned long nr = mas.last - base + 1;
- unsigned long off;
-
- for (off = 0; off < nr; ) {
- unsigned int batch = min_t(unsigned long,
- nr - off, 32);
- unsigned int j;
-
- for (j = 0; j < batch; j++)
- pages[j] = pfn_to_page(base + off + j);
- unpin_user_pages(pages, batch);
- off += batch;
- }
- kfree(range);
- }
+ ublk_shmem_remove_ranges(ub, -1);
mtree_destroy(&ub->buf_tree);
ida_destroy(&ub->buf_ida);
}
diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c
index 8baf642037fd..55eeb6aac0ea 100644
--- a/drivers/block/zloop.c
+++ b/drivers/block/zloop.c
@@ -288,12 +288,29 @@ static bool zloop_do_open_zone(struct zloop_device *zlo,
}
}
+static void zloop_mark_full(struct zloop_device *zlo, struct zloop_zone *zone)
+{
+ lockdep_assert_held(&zone->wp_lock);
+
+ zloop_lru_remove_open_zone(zlo, zone);
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = ULLONG_MAX;
+}
+
+static void zloop_mark_empty(struct zloop_device *zlo, struct zloop_zone *zone)
+{
+ lockdep_assert_held(&zone->wp_lock);
+
+ zloop_lru_remove_open_zone(zlo, zone);
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+}
+
static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
struct kstat stat;
sector_t file_sectors;
- unsigned long flags;
int ret;
lockdep_assert_held(&zone->lock);
@@ -313,28 +330,24 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no)
return -EINVAL;
}
- if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
- pr_err("Zone %u file size not aligned to block size %u\n",
- zone_no, zlo->block_size);
+ if (!IS_ALIGNED(stat.size, zlo->block_size)) {
+ pr_err("Zone %u file size (%llu) not aligned to block size %u\n",
+ zone_no, stat.size, zlo->block_size);
return -EINVAL;
}
- spin_lock_irqsave(&zone->wp_lock, flags);
+ spin_lock(&zone->wp_lock);
if (!file_sectors) {
- zloop_lru_remove_open_zone(zlo, zone);
- zone->cond = BLK_ZONE_COND_EMPTY;
- zone->wp = zone->start;
+ zloop_mark_empty(zlo, zone);
} else if (file_sectors == zlo->zone_capacity) {
- zloop_lru_remove_open_zone(zlo, zone);
- zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = ULLONG_MAX;
+ zloop_mark_full(zlo, zone);
} else {
if (zone->cond != BLK_ZONE_COND_IMP_OPEN &&
zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_CLOSED;
zone->wp = zone->start + file_sectors;
}
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
return 0;
}
@@ -367,7 +380,6 @@ unlock:
static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
- unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -386,13 +398,13 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no)
break;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
- spin_lock_irqsave(&zone->wp_lock, flags);
+ spin_lock(&zone->wp_lock);
zloop_lru_remove_open_zone(zlo, zone);
if (zone->wp == zone->start)
zone->cond = BLK_ZONE_COND_EMPTY;
else
zone->cond = BLK_ZONE_COND_CLOSED;
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
break;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_FULL:
@@ -410,7 +422,6 @@ unlock:
static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
- unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -428,12 +439,10 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no)
goto unlock;
}
- spin_lock_irqsave(&zone->wp_lock, flags);
- zloop_lru_remove_open_zone(zlo, zone);
- zone->cond = BLK_ZONE_COND_EMPTY;
- zone->wp = zone->start;
+ spin_lock(&zone->wp_lock);
+ zloop_mark_empty(zlo, zone);
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
unlock:
mutex_unlock(&zone->lock);
@@ -458,7 +467,6 @@ static int zloop_reset_all_zones(struct zloop_device *zlo)
static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
{
struct zloop_zone *zone = &zlo->zones[zone_no];
- unsigned long flags;
int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags))
@@ -476,12 +484,10 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no)
goto unlock;
}
- spin_lock_irqsave(&zone->wp_lock, flags);
- zloop_lru_remove_open_zone(zlo, zone);
- zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = ULLONG_MAX;
+ spin_lock(&zone->wp_lock);
+ zloop_mark_full(zlo, zone);
clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
unlock:
mutex_unlock(&zone->lock);
@@ -571,10 +577,9 @@ static int zloop_seq_write_prep(struct zloop_cmd *cmd)
bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND;
struct zloop_zone *zone = &zlo->zones[zone_no];
sector_t zone_end = zone->start + zlo->zone_capacity;
- unsigned long flags;
int ret = 0;
- spin_lock_irqsave(&zone->wp_lock, flags);
+ spin_lock(&zone->wp_lock);
/*
* Zone append operations always go at the current write pointer, but
@@ -616,14 +621,11 @@ static int zloop_seq_write_prep(struct zloop_cmd *cmd)
*/
if (!is_append || !zlo->ordered_zone_append) {
zone->wp += nr_sectors;
- if (zone->wp == zone_end) {
- zloop_lru_remove_open_zone(zlo, zone);
- zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = ULLONG_MAX;
- }
+ if (zone->wp == zone_end)
+ zloop_mark_full(zlo, zone);
}
out_unlock:
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
return ret;
}
@@ -861,25 +863,21 @@ static bool zloop_set_zone_append_sector(struct request *rq)
struct zloop_zone *zone = &zlo->zones[zone_no];
sector_t zone_end = zone->start + zlo->zone_capacity;
sector_t nr_sectors = blk_rq_sectors(rq);
- unsigned long flags;
- spin_lock_irqsave(&zone->wp_lock, flags);
+ spin_lock(&zone->wp_lock);
if (zone->cond == BLK_ZONE_COND_FULL ||
zone->wp + nr_sectors > zone_end) {
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
return false;
}
rq->__sector = zone->wp;
zone->wp += blk_rq_sectors(rq);
- if (zone->wp >= zone_end) {
- zloop_lru_remove_open_zone(zlo, zone);
- zone->cond = BLK_ZONE_COND_FULL;
- zone->wp = ULLONG_MAX;
- }
+ if (zone->wp >= zone_end)
+ zloop_mark_full(zlo, zone);
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
return true;
}
@@ -891,8 +889,10 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx,
struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq);
struct zloop_device *zlo = rq->q->queuedata;
- if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting)
+ if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting) {
+ rq->rq_flags |= RQF_QUIET;
return BLK_STS_IOERR;
+ }
/*
* If we need to strongly order zone append operations, set the request
@@ -938,7 +938,6 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
struct zloop_device *zlo = disk->private_data;
struct blk_zone blkz = {};
unsigned int first, i;
- unsigned long flags;
int ret;
first = disk_zone_no(disk, sector);
@@ -962,9 +961,9 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector,
blkz.start = zone->start;
blkz.len = zlo->zone_size;
- spin_lock_irqsave(&zone->wp_lock, flags);
+ spin_lock(&zone->wp_lock);
blkz.wp = zone->wp;
- spin_unlock_irqrestore(&zone->wp_lock, flags);
+ spin_unlock(&zone->wp_lock);
blkz.cond = zone->cond;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) {
blkz.type = BLK_ZONE_TYPE_CONVENTIONAL;
@@ -1363,20 +1362,6 @@ out:
return ret;
}
-static void zloop_truncate(struct file *file, loff_t pos)
-{
- struct mnt_idmap *idmap = file_mnt_idmap(file);
- struct dentry *dentry = file_dentry(file);
- struct iattr newattrs;
-
- newattrs.ia_size = pos;
- newattrs.ia_valid = ATTR_SIZE;
-
- inode_lock(dentry->d_inode);
- notify_change(idmap, dentry, &newattrs, NULL);
- inode_unlock(dentry->d_inode);
-}
-
static void zloop_forget_cache(struct zloop_device *zlo)
{
unsigned int i;
@@ -1401,8 +1386,18 @@ static void zloop_forget_cache(struct zloop_device *zlo)
zlo->disk->part0, ret);
continue;
}
- if (old_wp < zone->wp)
- zloop_truncate(file, old_wp);
+
+ if (old_wp > zone->wp)
+ continue;
+ /*
+ * This should not happen, if we recored a full zone, it can't
+ * be active.
+ */
+ if (WARN_ON_ONCE(old_wp == ULLONG_MAX))
+ continue;
+
+ vfs_truncate(&file->f_path,
+ (old_wp - zone->start) << SECTOR_SHIFT);
}
}