diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-24 15:06:55 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-24 15:06:55 -0700 |
| commit | f3e3dbcea15e20f7413afd8c791a496f0b80e80b (patch) | |
| tree | c86781264f4dde34121efe316c2cf39790d9c8e2 /drivers/block | |
| parent | fa58e6e9000c1cc76a7a0c06ea3e68d728cc4247 (diff) | |
| parent | 895a9b37917d2718ef2240a7ead7458c22f1f011 (diff) | |
Merge tag 'block-7.1-20260424' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixes from Jens Axboe:
- Series for zloop, fixing a variety of issues
- t10-pi code cleanup
- Fix for a merge window regression with the bio memory allocation mask
- Fix for a merge window regression in ublk, caused by an issue with
the maple tree iteration code at teardown
- ublk self tests additions
- Zoned device pgmap fixes
- Various little cleanups and fixes
* tag 'block-7.1-20260424' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (21 commits)
Revert "floppy: fix reference leak on platform_device_register() failure"
ublk: avoid unpinning pages under maple tree spinlock
ublk: refactor common helper ublk_shmem_remove_ranges()
ublk: fix maple tree lockdep warning in ublk_buf_cleanup
selftests: ublk: add ublk auto integrity test
selftests: ublk: enable test_integrity_02.sh on fio 3.42
selftests: ublk: remove unused argument to _cleanup
block: only restrict bio allocation gfp mask asked to block
block/blk-throttle: Add WQ_PERCPU to alloc_workqueue users
block: Add WQ_PERCPU to alloc_workqueue users
block: relax pgmap check in bio_add_page for compatible zone device pages
block: add pgmap check to biovec_phys_mergeable
floppy: fix reference leak on platform_device_register() failure
ublk: use unchecked copy helpers for bio page data
t10-pi: reduce ref tag code duplication
zloop: remove irq-safe locking
zloop: factor out zloop_mark_{full,empty} helpers
zloop: set RQF_QUIET when completing requests on deleted devices
zloop: improve the unaligned write pointer warning
zloop: use vfs_truncate
...
Diffstat (limited to 'drivers/block')
| -rw-r--r-- | drivers/block/ublk_drv.c | 123 | ||||
| -rw-r--r-- | drivers/block/zloop.c | 123 |
2 files changed, 139 insertions, 107 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 603a98a30989..8e5f3738c203 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1319,10 +1319,18 @@ static bool ublk_copy_user_bvec(const struct bio_vec *bv, unsigned *offset, len = bv->bv_len - *offset; bv_buf = kmap_local_page(bv->bv_page) + bv->bv_offset + *offset; + /* + * Bio pages may originate from slab caches without a usercopy region + * (e.g. jbd2 frozen metadata buffers). This is the same data that + * the loop driver writes to its backing file — no exposure risk. + * The bvec length is always trusted, so the size check in + * check_copy_size() is not needed either. Use the unchecked + * helpers to avoid false positives on slab pages. + */ if (dir == ITER_DEST) - copied = copy_to_iter(bv_buf, len, uiter); + copied = _copy_to_iter(bv_buf, len, uiter); else - copied = copy_from_iter(bv_buf, len, uiter); + copied = _copy_from_iter(bv_buf, len, uiter); kunmap_local(bv_buf); @@ -5413,39 +5421,88 @@ err_free_pages: return ret; } -static int __ublk_ctrl_unreg_buf(struct ublk_device *ub, int buf_index) +static void ublk_unpin_range_pages(unsigned long base_pfn, + unsigned long nr_pages) +{ +#define UBLK_UNPIN_BATCH 32 + struct page *pages[UBLK_UNPIN_BATCH]; + unsigned long off; + + for (off = 0; off < nr_pages; ) { + unsigned int batch = min_t(unsigned long, + nr_pages - off, UBLK_UNPIN_BATCH); + unsigned int j; + + for (j = 0; j < batch; j++) + pages[j] = pfn_to_page(base_pfn + off + j); + unpin_user_pages(pages, batch); + off += batch; + } +} + +/* + * Inner loop: erase up to UBLK_REMOVE_BATCH matching ranges under + * mas_lock, collecting them into an xarray. Then drop the lock and + * unpin pages + free ranges outside spinlock context. + * + * Returns true if the tree walk completed, false if more ranges remain. + * Xarray key is the base PFN, value encodes nr_pages via xa_mk_value(). + */ +#define UBLK_REMOVE_BATCH 64 + +static bool __ublk_shmem_remove_ranges(struct ublk_device *ub, + int buf_index, int *ret) { MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); struct ublk_buf_range *range; - struct page *pages[32]; - int ret = -ENOENT; + struct xarray to_unpin; + unsigned long idx; + unsigned int count = 0; + bool done = false; + void *entry; + + xa_init(&to_unpin); mas_lock(&mas); mas_for_each(&mas, range, ULONG_MAX) { - unsigned long base, nr, off; + unsigned long nr; - if (range->buf_index != buf_index) + if (buf_index >= 0 && range->buf_index != buf_index) continue; - ret = 0; - base = mas.index; - nr = mas.last - base + 1; + *ret = 0; + nr = mas.last - mas.index + 1; + if (xa_err(xa_store(&to_unpin, mas.index, + xa_mk_value(nr), GFP_ATOMIC))) + goto unlock; mas_erase(&mas); - - for (off = 0; off < nr; ) { - unsigned int batch = min_t(unsigned long, - nr - off, 32); - unsigned int j; - - for (j = 0; j < batch; j++) - pages[j] = pfn_to_page(base + off + j); - unpin_user_pages(pages, batch); - off += batch; - } kfree(range); + if (++count >= UBLK_REMOVE_BATCH) + goto unlock; } + done = true; +unlock: mas_unlock(&mas); + xa_for_each(&to_unpin, idx, entry) + ublk_unpin_range_pages(idx, xa_to_value(entry)); + xa_destroy(&to_unpin); + + return done; +} + +/* + * Remove ranges from the maple tree matching buf_index, unpin pages + * and free range structs. If buf_index < 0, remove all ranges. + * Processes ranges in batches to avoid holding the maple tree spinlock + * across potentially expensive page unpinning. + */ +static int ublk_shmem_remove_ranges(struct ublk_device *ub, int buf_index) +{ + int ret = -ENOENT; + + while (!__ublk_shmem_remove_ranges(ub, buf_index, &ret)) + cond_resched(); return ret; } @@ -5464,7 +5521,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, memflags = ublk_lock_buf_tree(ub); - ret = __ublk_ctrl_unreg_buf(ub, index); + ret = ublk_shmem_remove_ranges(ub, index); if (!ret) ida_free(&ub->buf_ida, index); @@ -5474,27 +5531,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, static void ublk_buf_cleanup(struct ublk_device *ub) { - MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); - struct ublk_buf_range *range; - struct page *pages[32]; - - mas_for_each(&mas, range, ULONG_MAX) { - unsigned long base = mas.index; - unsigned long nr = mas.last - base + 1; - unsigned long off; - - for (off = 0; off < nr; ) { - unsigned int batch = min_t(unsigned long, - nr - off, 32); - unsigned int j; - - for (j = 0; j < batch; j++) - pages[j] = pfn_to_page(base + off + j); - unpin_user_pages(pages, batch); - off += batch; - } - kfree(range); - } + ublk_shmem_remove_ranges(ub, -1); mtree_destroy(&ub->buf_tree); ida_destroy(&ub->buf_ida); } diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 8baf642037fd..55eeb6aac0ea 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -288,12 +288,29 @@ static bool zloop_do_open_zone(struct zloop_device *zlo, } } +static void zloop_mark_full(struct zloop_device *zlo, struct zloop_zone *zone) +{ + lockdep_assert_held(&zone->wp_lock); + + zloop_lru_remove_open_zone(zlo, zone); + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = ULLONG_MAX; +} + +static void zloop_mark_empty(struct zloop_device *zlo, struct zloop_zone *zone) +{ + lockdep_assert_held(&zone->wp_lock); + + zloop_lru_remove_open_zone(zlo, zone); + zone->cond = BLK_ZONE_COND_EMPTY; + zone->wp = zone->start; +} + static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; struct kstat stat; sector_t file_sectors; - unsigned long flags; int ret; lockdep_assert_held(&zone->lock); @@ -313,28 +330,24 @@ static int zloop_update_seq_zone(struct zloop_device *zlo, unsigned int zone_no) return -EINVAL; } - if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) { - pr_err("Zone %u file size not aligned to block size %u\n", - zone_no, zlo->block_size); + if (!IS_ALIGNED(stat.size, zlo->block_size)) { + pr_err("Zone %u file size (%llu) not aligned to block size %u\n", + zone_no, stat.size, zlo->block_size); return -EINVAL; } - spin_lock_irqsave(&zone->wp_lock, flags); + spin_lock(&zone->wp_lock); if (!file_sectors) { - zloop_lru_remove_open_zone(zlo, zone); - zone->cond = BLK_ZONE_COND_EMPTY; - zone->wp = zone->start; + zloop_mark_empty(zlo, zone); } else if (file_sectors == zlo->zone_capacity) { - zloop_lru_remove_open_zone(zlo, zone); - zone->cond = BLK_ZONE_COND_FULL; - zone->wp = ULLONG_MAX; + zloop_mark_full(zlo, zone); } else { if (zone->cond != BLK_ZONE_COND_IMP_OPEN && zone->cond != BLK_ZONE_COND_EXP_OPEN) zone->cond = BLK_ZONE_COND_CLOSED; zone->wp = zone->start + file_sectors; } - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); return 0; } @@ -367,7 +380,6 @@ unlock: static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; - unsigned long flags; int ret = 0; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) @@ -386,13 +398,13 @@ static int zloop_close_zone(struct zloop_device *zlo, unsigned int zone_no) break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: - spin_lock_irqsave(&zone->wp_lock, flags); + spin_lock(&zone->wp_lock); zloop_lru_remove_open_zone(zlo, zone); if (zone->wp == zone->start) zone->cond = BLK_ZONE_COND_EMPTY; else zone->cond = BLK_ZONE_COND_CLOSED; - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); break; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_FULL: @@ -410,7 +422,6 @@ unlock: static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; - unsigned long flags; int ret = 0; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) @@ -428,12 +439,10 @@ static int zloop_reset_zone(struct zloop_device *zlo, unsigned int zone_no) goto unlock; } - spin_lock_irqsave(&zone->wp_lock, flags); - zloop_lru_remove_open_zone(zlo, zone); - zone->cond = BLK_ZONE_COND_EMPTY; - zone->wp = zone->start; + spin_lock(&zone->wp_lock); + zloop_mark_empty(zlo, zone); clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); unlock: mutex_unlock(&zone->lock); @@ -458,7 +467,6 @@ static int zloop_reset_all_zones(struct zloop_device *zlo) static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) { struct zloop_zone *zone = &zlo->zones[zone_no]; - unsigned long flags; int ret = 0; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) @@ -476,12 +484,10 @@ static int zloop_finish_zone(struct zloop_device *zlo, unsigned int zone_no) goto unlock; } - spin_lock_irqsave(&zone->wp_lock, flags); - zloop_lru_remove_open_zone(zlo, zone); - zone->cond = BLK_ZONE_COND_FULL; - zone->wp = ULLONG_MAX; + spin_lock(&zone->wp_lock); + zloop_mark_full(zlo, zone); clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); unlock: mutex_unlock(&zone->lock); @@ -571,10 +577,9 @@ static int zloop_seq_write_prep(struct zloop_cmd *cmd) bool is_append = req_op(rq) == REQ_OP_ZONE_APPEND; struct zloop_zone *zone = &zlo->zones[zone_no]; sector_t zone_end = zone->start + zlo->zone_capacity; - unsigned long flags; int ret = 0; - spin_lock_irqsave(&zone->wp_lock, flags); + spin_lock(&zone->wp_lock); /* * Zone append operations always go at the current write pointer, but @@ -616,14 +621,11 @@ static int zloop_seq_write_prep(struct zloop_cmd *cmd) */ if (!is_append || !zlo->ordered_zone_append) { zone->wp += nr_sectors; - if (zone->wp == zone_end) { - zloop_lru_remove_open_zone(zlo, zone); - zone->cond = BLK_ZONE_COND_FULL; - zone->wp = ULLONG_MAX; - } + if (zone->wp == zone_end) + zloop_mark_full(zlo, zone); } out_unlock: - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); return ret; } @@ -861,25 +863,21 @@ static bool zloop_set_zone_append_sector(struct request *rq) struct zloop_zone *zone = &zlo->zones[zone_no]; sector_t zone_end = zone->start + zlo->zone_capacity; sector_t nr_sectors = blk_rq_sectors(rq); - unsigned long flags; - spin_lock_irqsave(&zone->wp_lock, flags); + spin_lock(&zone->wp_lock); if (zone->cond == BLK_ZONE_COND_FULL || zone->wp + nr_sectors > zone_end) { - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); return false; } rq->__sector = zone->wp; zone->wp += blk_rq_sectors(rq); - if (zone->wp >= zone_end) { - zloop_lru_remove_open_zone(zlo, zone); - zone->cond = BLK_ZONE_COND_FULL; - zone->wp = ULLONG_MAX; - } + if (zone->wp >= zone_end) + zloop_mark_full(zlo, zone); - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); return true; } @@ -891,8 +889,10 @@ static blk_status_t zloop_queue_rq(struct blk_mq_hw_ctx *hctx, struct zloop_cmd *cmd = blk_mq_rq_to_pdu(rq); struct zloop_device *zlo = rq->q->queuedata; - if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting) + if (data_race(READ_ONCE(zlo->state)) == Zlo_deleting) { + rq->rq_flags |= RQF_QUIET; return BLK_STS_IOERR; + } /* * If we need to strongly order zone append operations, set the request @@ -938,7 +938,6 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector, struct zloop_device *zlo = disk->private_data; struct blk_zone blkz = {}; unsigned int first, i; - unsigned long flags; int ret; first = disk_zone_no(disk, sector); @@ -962,9 +961,9 @@ static int zloop_report_zones(struct gendisk *disk, sector_t sector, blkz.start = zone->start; blkz.len = zlo->zone_size; - spin_lock_irqsave(&zone->wp_lock, flags); + spin_lock(&zone->wp_lock); blkz.wp = zone->wp; - spin_unlock_irqrestore(&zone->wp_lock, flags); + spin_unlock(&zone->wp_lock); blkz.cond = zone->cond; if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { blkz.type = BLK_ZONE_TYPE_CONVENTIONAL; @@ -1363,20 +1362,6 @@ out: return ret; } -static void zloop_truncate(struct file *file, loff_t pos) -{ - struct mnt_idmap *idmap = file_mnt_idmap(file); - struct dentry *dentry = file_dentry(file); - struct iattr newattrs; - - newattrs.ia_size = pos; - newattrs.ia_valid = ATTR_SIZE; - - inode_lock(dentry->d_inode); - notify_change(idmap, dentry, &newattrs, NULL); - inode_unlock(dentry->d_inode); -} - static void zloop_forget_cache(struct zloop_device *zlo) { unsigned int i; @@ -1401,8 +1386,18 @@ static void zloop_forget_cache(struct zloop_device *zlo) zlo->disk->part0, ret); continue; } - if (old_wp < zone->wp) - zloop_truncate(file, old_wp); + + if (old_wp > zone->wp) + continue; + /* + * This should not happen, if we recored a full zone, it can't + * be active. + */ + if (WARN_ON_ONCE(old_wp == ULLONG_MAX)) + continue; + + vfs_truncate(&file->f_path, + (old_wp - zone->start) << SECTOR_SHIFT); } } |
