summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 10:16:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 10:16:56 -0700
commite1b1d03ceec343362524318c076b110066ffe305 (patch)
treeca71105f13d893118eab4feb826d20cca066c53d /drivers/block
parent5832d26433f2bd0d28f8b12526e3c2fdb203507f (diff)
parent130e6de62107116eba124647116276266be0f84c (diff)
Merge tag 'for-6.18/block-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block updates from Jens Axboe: - NVMe pull request via Keith: - FC target fixes (Daniel) - Authentication fixes and updates (Martin, Chris) - Admin controller handling (Kamaljit) - Target lockdep assertions (Max) - Keep-alive updates for discovery (Alastair) - Suspend quirk (Georg) - MD pull request via Yu: - Add support for a lockless bitmap. A key feature for the new bitmap are that the IO fastpath is lockless. If a user issues lots of write IO to the same bitmap bit in a short time, only the first write has additional overhead to update bitmap bit, no additional overhead for the following writes. By supporting only resync or recover written data, means in the case creating new array or replacing with a new disk, there is no need to do a full disk resync/recovery. - Switch ->getgeo() and ->bios_param() to using struct gendisk rather than struct block_device. - Rust block changes via Andreas. This series adds configuration via configfs and remote completion to the rnull driver. The series also includes a set of changes to the rust block device driver API: a few cleanup patches, and a few features supporting the rnull changes. The series removes the raw buffer formatting logic from `kernel::block` and improves the logic available in `kernel::string` to support the same use as the removed logic. - floppy arch cleanups - Reduce the number of dereferencing needed for ublk commands - Restrict supported sockets for nbd. Mostly done to eliminate a class of issues perpetually reported by syzbot, by using nonsensical socket setups. - A few s390 dasd block fixes - Fix a few issues around atomic writes - Improve DMA interation for integrity requests - Improve how iovecs are treated with regards to O_DIRECT aligment constraints. We used to require each segment to adhere to the constraints, now only the request as a whole needs to. - Clean up and improve p2p support, enabling use of p2p for metadata payloads - Improve locking of request lookup, using SRCU where appropriate - Use page references properly for brd, avoiding very long RCU sections - Fix ordering of recursively submitted IOs - Clean up and improve updating nr_requests for a live device - Various fixes and cleanups * tag 'for-6.18/block-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (164 commits) s390/dasd: enforce dma_alignment to ensure proper buffer validation s390/dasd: Return BLK_STS_INVAL for EINVAL from do_dasd_request ublk: remove redundant zone op check in ublk_setup_iod() nvme: Use non zero KATO for persistent discovery connections nvmet: add safety check for subsys lock nvme-core: use nvme_is_io_ctrl() for I/O controller check nvme-core: do ioccsz/iorcsz validation only for I/O controllers nvme-core: add method to check for an I/O controller blk-cgroup: fix possible deadlock while configuring policy blk-mq: fix null-ptr-deref in blk_mq_free_tags() from error path blk-mq: Fix more tag iteration function documentation selftests: ublk: fix behavior when fio is not installed ublk: don't access ublk_queue in ublk_unmap_io() ublk: pass ublk_io to __ublk_complete_rq() ublk: don't access ublk_queue in ublk_need_complete_req() ublk: don't access ublk_queue in ublk_check_commit_and_fetch() ublk: don't pass ublk_queue to ublk_fetch() ublk: don't access ublk_queue in ublk_config_io_buf() ublk: don't access ublk_queue in ublk_check_fetch_buf() ublk: pass q_id and tag to __ublk_check_and_get_req() ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig10
-rw-r--r--drivers/block/Makefile4
-rw-r--r--drivers/block/amiflop.c10
-rw-r--r--drivers/block/aoe/aoeblk.c4
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/brd.c75
-rw-r--r--drivers/block/floppy.c59
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c6
-rw-r--r--drivers/block/nbd.c10
-rw-r--r--drivers/block/null_blk/main.c2
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt.c6
-rw-r--r--drivers/block/rnull.rs80
-rw-r--r--drivers/block/rnull/Kconfig13
-rw-r--r--drivers/block/rnull/Makefile3
-rw-r--r--drivers/block/rnull/configfs.rs262
-rw-r--r--drivers/block/rnull/rnull.rs104
-rw-r--r--drivers/block/sunvdc.c7
-rw-r--r--drivers/block/swim.c4
-rw-r--r--drivers/block/ublk_drv.c236
-rw-r--r--drivers/block/virtio_blk.c8
-rw-r--r--drivers/block/xen-blkfront.c4
-rw-r--r--drivers/block/zram/zram_drv.c2
23 files changed, 617 insertions, 296 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index df38fb364904..77d694448990 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -17,6 +17,7 @@ menuconfig BLK_DEV
if BLK_DEV
source "drivers/block/null_blk/Kconfig"
+source "drivers/block/rnull/Kconfig"
config BLK_DEV_FD
tristate "Normal floppy disk support"
@@ -311,15 +312,6 @@ config VIRTIO_BLK
This is the virtual block driver for virtio. It can be used with
QEMU based VMMs (like KVM or Xen). Say Y or M.
-config BLK_DEV_RUST_NULL
- tristate "Rust null block driver (Experimental)"
- depends on RUST
- help
- This is the Rust implementation of the null block driver. For now it
- is only a minimal stub.
-
- If unsure, say N.
-
config BLK_DEV_RBD
tristate "Rados block device (RBD)"
depends on INET && BLOCK
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index a695ce74ef22..2d8096eb8cdf 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,9 +9,6 @@
# needed for trace events
ccflags-y += -I$(src)
-obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
-rnull_mod-y := rnull.o
-
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
@@ -38,6 +35,7 @@ obj-$(CONFIG_ZRAM) += zram/
obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/
+obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull/
obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o
obj-$(CONFIG_BLK_DEV_ZONED_LOOP) += zloop.o
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 6357d86eafdc..2932b6653b6f 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1523,13 +1523,13 @@ static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
-static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
- int drive = MINOR(bdev->bd_dev) & 3;
+ struct amiga_floppy_struct *p = disk->private_data;
- geo->heads = unit[drive].type->heads;
- geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult;
- geo->cylinders = unit[drive].type->tracks;
+ geo->heads = p->type->heads;
+ geo->sectors = p->dtype->sects * p->type->sect_mult;
+ geo->cylinders = p->type->tracks;
return 0;
}
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 00b74a845328..34ead75e7e02 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -269,9 +269,9 @@ static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx,
}
static int
-aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+aoeblk_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
- struct aoedev *d = bdev->bd_disk->private_data;
+ struct aoedev *d = disk->private_data;
if ((d->flags & DEVFL_UP) == 0) {
printk(KERN_ERR "aoe: disk not up\n");
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index cdf6e4041bb9..3b21750038ee 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -44,7 +44,7 @@ aoe_init(void)
{
int ret;
- aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
+ aoe_wq = alloc_workqueue("aoe_wq", WQ_PERCPU, 0);
if (!aoe_wq)
return -ENOMEM;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 0c2eabe14af3..9778259b30d4 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -44,45 +44,74 @@ struct brd_device {
};
/*
- * Look up and return a brd's page for a given sector.
+ * Look up and return a brd's page with reference grabbed for a given sector.
*/
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
{
- return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+ struct page *page;
+ XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+
+ rcu_read_lock();
+repeat:
+ page = xas_load(&xas);
+ if (xas_retry(&xas, page)) {
+ xas_reset(&xas);
+ goto repeat;
+ }
+
+ if (!page)
+ goto out;
+
+ if (!get_page_unless_zero(page)) {
+ xas_reset(&xas);
+ goto repeat;
+ }
+
+ if (unlikely(page != xas_reload(&xas))) {
+ put_page(page);
+ xas_reset(&xas);
+ goto repeat;
+ }
+out:
+ rcu_read_unlock();
+
+ return page;
}
/*
* Insert a new page for a given sector, if one does not already exist.
+ * The returned page will grab reference.
*/
static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
blk_opf_t opf)
- __releases(rcu)
- __acquires(rcu)
{
gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
struct page *page, *ret;
- rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
- if (!page) {
- rcu_read_lock();
+ if (!page)
return ERR_PTR(-ENOMEM);
- }
xa_lock(&brd->brd_pages);
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
page, gfp);
- rcu_read_lock();
- if (ret) {
+ if (!ret) {
+ brd->brd_nr_pages++;
+ get_page(page);
+ xa_unlock(&brd->brd_pages);
+ return page;
+ }
+
+ if (!xa_is_err(ret)) {
+ get_page(ret);
xa_unlock(&brd->brd_pages);
- __free_page(page);
- if (xa_is_err(ret))
- return ERR_PTR(xa_err(ret));
+ put_page(page);
return ret;
}
- brd->brd_nr_pages++;
+
xa_unlock(&brd->brd_pages);
- return page;
+ put_page(page);
+ return ERR_PTR(xa_err(ret));
}
/*
@@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd)
pgoff_t idx;
xa_for_each(&brd->brd_pages, idx, page) {
- __free_page(page);
+ put_page(page);
cond_resched();
}
@@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
- rcu_read_lock();
page = brd_lookup_page(brd, sector);
if (!page && op_is_write(opf)) {
page = brd_insert_page(brd, sector, opf);
@@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
memset(kaddr, 0, bv.bv_len);
}
kunmap_local(kaddr);
- rcu_read_unlock();
bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+ if (page)
+ put_page(page);
return true;
out_error:
- rcu_read_unlock();
if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
@@ -149,13 +177,6 @@ out_error:
return false;
}
-static void brd_free_one_page(struct rcu_head *head)
-{
- struct page *page = container_of(head, struct page, rcu_head);
-
- __free_page(page);
-}
-
static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
{
sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
@@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
if (page) {
- call_rcu(&page->rcu_head, brd_free_one_page);
+ put_page(page);
brd->brd_nr_pages--;
}
aligned_sector += PAGE_SECTORS;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 24be0c2c4075..5336c3c5ca36 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -163,35 +163,35 @@
/* do print messages for unexpected interrupts */
static int print_unex = 1;
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/workqueue.h>
-#include <linux/fdreg.h>
-#include <linux/fd.h>
-#include <linux/hdreg.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
+#include <linux/async.h>
#include <linux/bio.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-#include <linux/fcntl.h>
+#include <linux/compat.h>
#include <linux/delay.h>
-#include <linux/mc146818rtc.h> /* CMOS defines */
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/fd.h>
+#include <linux/fdreg.h>
+#include <linux/fs.h>
+#include <linux/hdreg.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
#include <linux/major.h>
-#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h> /* CMOS defines */
+#include <linux/mm.h>
#include <linux/mod_devicetable.h>
+#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/timer.h>
#include <linux/uaccess.h>
-#include <linux/async.h>
-#include <linux/compat.h>
+#include <linux/workqueue.h>
/*
* PS/2 floppies have much slower step rates than regular floppies.
@@ -233,8 +233,6 @@ static unsigned short virtual_dma_port = 0x3f0;
irqreturn_t floppy_interrupt(int irq, void *dev_id);
static int set_dor(int fdc, char mask, char data);
-#define K_64 0x10000 /* 64KB */
-
/* the following is the mask of allowed drives. By default units 2 and
* 3 of both floppy controllers are disabled, because switching on the
* motor of these drives causes system hangs on some PCI computers. drive
@@ -3092,16 +3090,13 @@ static int raw_cmd_copyin(int cmd, void __user *param,
*rcmd = NULL;
loop:
- ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL);
- if (!ptr)
- return -ENOMEM;
+ ptr = memdup_user(param, sizeof(*ptr));
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
*rcmd = ptr;
- ret = copy_from_user(ptr, param, sizeof(*ptr));
ptr->next = NULL;
ptr->buffer_length = 0;
ptr->kernel_data = NULL;
- if (ret)
- return -EFAULT;
param += sizeof(struct floppy_raw_cmd);
if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE)
return -EINVAL;
@@ -3363,9 +3358,9 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
return 0;
}
-static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
- int drive = (long)bdev->bd_disk->private_data;
+ int drive = (long)disk->private_data;
int type = ITYPE(drive_state[drive].fd_device);
struct floppy_struct *g;
int ret;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 8fc7761397bd..567192e371a8 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3148,17 +3148,17 @@ static int mtip_block_compat_ioctl(struct block_device *dev,
* that each partition is also 4KB aligned. Non-aligned partitions adversely
* affects performance.
*
- * @dev Pointer to the block_device strucutre.
+ * @disk Pointer to the gendisk strucutre.
* @geo Pointer to a hd_geometry structure.
*
* return value
* 0 Operation completed successfully.
* -ENOTTY An error occurred while reading the drive capacity.
*/
-static int mtip_block_getgeo(struct block_device *dev,
+static int mtip_block_getgeo(struct gendisk *disk,
struct hd_geometry *geo)
{
- struct driver_data *dd = dev->bd_disk->private_data;
+ struct driver_data *dd = disk->private_data;
sector_t capacity;
if (!dd)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6463d0e8d0ce..1188f32a5e5e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -311,7 +311,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
if (args) {
INIT_WORK(&args->work, nbd_dead_link_work);
args->index = nbd->index;
- queue_work(system_wq, &args->work);
+ queue_work(system_percpu_wq, &args->work);
}
}
if (!nsock->dead) {
@@ -1217,6 +1217,14 @@ static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
if (!sock)
return NULL;
+ if (!sk_is_tcp(sock->sk) &&
+ !sk_is_stream_unix(sock->sk)) {
+ dev_err(disk_to_dev(nbd->disk), "Unsupported socket: should be TCP or UNIX.\n");
+ *err = -EINVAL;
+ sockfd_put(sock);
+ return NULL;
+ }
+
if (sock->ops->shutdown == sock_no_shutdown) {
dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
*err = -EINVAL;
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 91642c9a3b29..f982027e8c85 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -223,7 +223,7 @@ MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed nu
static unsigned long g_cache_size;
module_param_named(cache_size, g_cache_size, ulong, 0444);
-MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+MODULE_PARM_DESC(cache_size, "Cache size in MiB for memory-backed device. Default: 0 (none)");
static bool g_fua = true;
module_param_named(fua, g_fua, bool, 0444);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index faafd7ff43d6..af0e21149dbc 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7389,7 +7389,7 @@ static int __init rbd_init(void)
* The number of active work items is limited by the number of
* rbd devices * queue depth, so leave @max_active at default.
*/
- rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
+ rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!rbd_wq) {
rc = -ENOMEM;
goto err_out_slab;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 15627417f12e..f1409e54010a 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -942,11 +942,11 @@ static void rnbd_client_release(struct gendisk *gen)
rnbd_clt_put_dev(dev);
}
-static int rnbd_client_getgeo(struct block_device *block_device,
+static int rnbd_client_getgeo(struct gendisk *disk,
struct hd_geometry *geo)
{
u64 size;
- struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
+ struct rnbd_clt_dev *dev = disk->private_data;
struct queue_limits *limit = &dev->queue->limits;
size = dev->size * (limit->logical_block_size / SECTOR_SIZE);
@@ -1809,7 +1809,7 @@ static int __init rnbd_client_init(void)
unregister_blkdev(rnbd_client_major, "rnbd");
return err;
}
- rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
+ rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", WQ_PERCPU, 0);
if (!rnbd_clt_wq) {
pr_err("Failed to load module, alloc_workqueue failed.\n");
rnbd_clt_destroy_sysfs_files();
diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs
deleted file mode 100644
index 6366da12c5a5..000000000000
--- a/drivers/block/rnull.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! This is a Rust implementation of the C null block driver.
-//!
-//! Supported features:
-//!
-//! - blk-mq interface
-//! - direct completion
-//! - block size 4k
-//!
-//! The driver is not configurable.
-
-use kernel::{
- alloc::flags,
- block::mq::{
- self,
- gen_disk::{self, GenDisk},
- Operations, TagSet,
- },
- error::Result,
- new_mutex, pr_info,
- prelude::*,
- sync::{Arc, Mutex},
- types::ARef,
-};
-
-module! {
- type: NullBlkModule,
- name: "rnull_mod",
- authors: ["Andreas Hindborg"],
- description: "Rust implementation of the C null block driver",
- license: "GPL v2",
-}
-
-#[pin_data]
-struct NullBlkModule {
- #[pin]
- _disk: Mutex<GenDisk<NullBlkDevice>>,
-}
-
-impl kernel::InPlaceModule for NullBlkModule {
- fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
- pr_info!("Rust null_blk loaded\n");
-
- // Use a immediately-called closure as a stable `try` block
- let disk = /* try */ (|| {
- let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
-
- gen_disk::GenDiskBuilder::new()
- .capacity_sectors(4096 << 11)
- .logical_block_size(4096)?
- .physical_block_size(4096)?
- .rotational(false)
- .build(fmt!("rnullb{}", 0), tagset)
- })();
-
- try_pin_init!(Self {
- _disk <- new_mutex!(disk?, "nullb:disk"),
- })
- }
-}
-
-struct NullBlkDevice;
-
-#[vtable]
-impl Operations for NullBlkDevice {
- #[inline(always)]
- fn queue_rq(rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result {
- mq::Request::end_ok(rq)
- .map_err(|_e| kernel::error::code::EIO)
- // We take no refcounts on the request, so we expect to be able to
- // end the request. The request reference must be unique at this
- // point, and so `end_ok` cannot fail.
- .expect("Fatal error - expected to be able to end request");
-
- Ok(())
- }
-
- fn commit_rqs() {}
-}
diff --git a/drivers/block/rnull/Kconfig b/drivers/block/rnull/Kconfig
new file mode 100644
index 000000000000..7bc5b376c128
--- /dev/null
+++ b/drivers/block/rnull/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Rust null block device driver configuration
+
+config BLK_DEV_RUST_NULL
+ tristate "Rust null block driver (Experimental)"
+ depends on RUST && CONFIGFS_FS
+ help
+ This is the Rust implementation of the null block driver. Like
+ the C version, the driver allows the user to create virutal block
+ devices that can be configured via various configuration options.
+
+ If unsure, say N.
diff --git a/drivers/block/rnull/Makefile b/drivers/block/rnull/Makefile
new file mode 100644
index 000000000000..11cfa5e615dc
--- /dev/null
+++ b/drivers/block/rnull/Makefile
@@ -0,0 +1,3 @@
+
+obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
+rnull_mod-y := rnull.o
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
new file mode 100644
index 000000000000..8498e9bae6fd
--- /dev/null
+++ b/drivers/block/rnull/configfs.rs
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use super::{NullBlkDevice, THIS_MODULE};
+use core::fmt::{Display, Write};
+use kernel::{
+ block::mq::gen_disk::{GenDisk, GenDiskBuilder},
+ c_str,
+ configfs::{self, AttributeOperations},
+ configfs_attrs, new_mutex,
+ page::PAGE_SIZE,
+ prelude::*,
+ str::{kstrtobool_bytes, CString},
+ sync::Mutex,
+};
+use pin_init::PinInit;
+
+pub(crate) fn subsystem() -> impl PinInit<kernel::configfs::Subsystem<Config>, Error> {
+ let item_type = configfs_attrs! {
+ container: configfs::Subsystem<Config>,
+ data: Config,
+ child: DeviceConfig,
+ attributes: [
+ features: 0,
+ ],
+ };
+
+ kernel::configfs::Subsystem::new(c_str!("rnull"), item_type, try_pin_init!(Config {}))
+}
+
+#[pin_data]
+pub(crate) struct Config {}
+
+#[vtable]
+impl AttributeOperations<0> for Config {
+ type Data = Config;
+
+ fn show(_this: &Config, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+ writer.write_str("blocksize,size,rotational,irqmode\n")?;
+ Ok(writer.bytes_written())
+ }
+}
+
+#[vtable]
+impl configfs::GroupOperations for Config {
+ type Child = DeviceConfig;
+
+ fn make_group(
+ &self,
+ name: &CStr,
+ ) -> Result<impl PinInit<configfs::Group<DeviceConfig>, Error>> {
+ let item_type = configfs_attrs! {
+ container: configfs::Group<DeviceConfig>,
+ data: DeviceConfig,
+ attributes: [
+ // Named for compatibility with C null_blk
+ power: 0,
+ blocksize: 1,
+ rotational: 2,
+ size: 3,
+ irqmode: 4,
+ ],
+ };
+
+ Ok(configfs::Group::new(
+ name.try_into()?,
+ item_type,
+ // TODO: cannot coerce new_mutex!() to impl PinInit<_, Error>, so put mutex inside
+ try_pin_init!( DeviceConfig {
+ data <- new_mutex!(DeviceConfigInner {
+ powered: false,
+ block_size: 4096,
+ rotational: false,
+ disk: None,
+ capacity_mib: 4096,
+ irq_mode: IRQMode::None,
+ name: name.try_into()?,
+ }),
+ }),
+ ))
+ }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum IRQMode {
+ None,
+ Soft,
+}
+
+impl TryFrom<u8> for IRQMode {
+ type Error = kernel::error::Error;
+
+ fn try_from(value: u8) -> Result<Self> {
+ match value {
+ 0 => Ok(Self::None),
+ 1 => Ok(Self::Soft),
+ _ => Err(EINVAL),
+ }
+ }
+}
+
+impl Display for IRQMode {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ match self {
+ Self::None => f.write_str("0")?,
+ Self::Soft => f.write_str("1")?,
+ }
+ Ok(())
+ }
+}
+
+#[pin_data]
+pub(crate) struct DeviceConfig {
+ #[pin]
+ data: Mutex<DeviceConfigInner>,
+}
+
+#[pin_data]
+struct DeviceConfigInner {
+ powered: bool,
+ name: CString,
+ block_size: u32,
+ rotational: bool,
+ capacity_mib: u64,
+ irq_mode: IRQMode,
+ disk: Option<GenDisk<NullBlkDevice>>,
+}
+
+#[vtable]
+impl configfs::AttributeOperations<0> for DeviceConfig {
+ type Data = DeviceConfig;
+
+ fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+
+ if this.data.lock().powered {
+ writer.write_str("1\n")?;
+ } else {
+ writer.write_str("0\n")?;
+ }
+
+ Ok(writer.bytes_written())
+ }
+
+ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+ let power_op = kstrtobool_bytes(page)?;
+ let mut guard = this.data.lock();
+
+ if !guard.powered && power_op {
+ guard.disk = Some(NullBlkDevice::new(
+ &guard.name,
+ guard.block_size,
+ guard.rotational,
+ guard.capacity_mib,
+ guard.irq_mode,
+ )?);
+ guard.powered = true;
+ } else if guard.powered && !power_op {
+ drop(guard.disk.take());
+ guard.powered = false;
+ }
+
+ Ok(())
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<1> for DeviceConfig {
+ type Data = DeviceConfig;
+
+ fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+ writer.write_fmt(fmt!("{}\n", this.data.lock().block_size))?;
+ Ok(writer.bytes_written())
+ }
+
+ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+ if this.data.lock().powered {
+ return Err(EBUSY);
+ }
+
+ let text = core::str::from_utf8(page)?.trim();
+ let value = text.parse::<u32>().map_err(|_| EINVAL)?;
+
+ GenDiskBuilder::validate_block_size(value)?;
+ this.data.lock().block_size = value;
+ Ok(())
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<2> for DeviceConfig {
+ type Data = DeviceConfig;
+
+ fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+
+ if this.data.lock().rotational {
+ writer.write_str("1\n")?;
+ } else {
+ writer.write_str("0\n")?;
+ }
+
+ Ok(writer.bytes_written())
+ }
+
+ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+ if this.data.lock().powered {
+ return Err(EBUSY);
+ }
+
+ this.data.lock().rotational = kstrtobool_bytes(page)?;
+
+ Ok(())
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<3> for DeviceConfig {
+ type Data = DeviceConfig;
+
+ fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+ writer.write_fmt(fmt!("{}\n", this.data.lock().capacity_mib))?;
+ Ok(writer.bytes_written())
+ }
+
+ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+ if this.data.lock().powered {
+ return Err(EBUSY);
+ }
+
+ let text = core::str::from_utf8(page)?.trim();
+ let value = text.parse::<u64>().map_err(|_| EINVAL)?;
+
+ this.data.lock().capacity_mib = value;
+ Ok(())
+ }
+}
+
+#[vtable]
+impl configfs::AttributeOperations<4> for DeviceConfig {
+ type Data = DeviceConfig;
+
+ fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> {
+ let mut writer = kernel::str::Formatter::new(page);
+ writer.write_fmt(fmt!("{}\n", this.data.lock().irq_mode))?;
+ Ok(writer.bytes_written())
+ }
+
+ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
+ if this.data.lock().powered {
+ return Err(EBUSY);
+ }
+
+ let text = core::str::from_utf8(page)?.trim();
+ let value = text.parse::<u8>().map_err(|_| EINVAL)?;
+
+ this.data.lock().irq_mode = IRQMode::try_from(value)?;
+ Ok(())
+ }
+}
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
new file mode 100644
index 000000000000..1ec694d7f1a6
--- /dev/null
+++ b/drivers/block/rnull/rnull.rs
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This is a Rust implementation of the C null block driver.
+
+mod configfs;
+
+use configfs::IRQMode;
+use kernel::{
+ block::{
+ self,
+ mq::{
+ self,
+ gen_disk::{self, GenDisk},
+ Operations, TagSet,
+ },
+ },
+ error::Result,
+ pr_info,
+ prelude::*,
+ sync::Arc,
+ types::ARef,
+};
+use pin_init::PinInit;
+
+module! {
+ type: NullBlkModule,
+ name: "rnull_mod",
+ authors: ["Andreas Hindborg"],
+ description: "Rust implementation of the C null block driver",
+ license: "GPL v2",
+}
+
+#[pin_data]
+struct NullBlkModule {
+ #[pin]
+ configfs_subsystem: kernel::configfs::Subsystem<configfs::Config>,
+}
+
+impl kernel::InPlaceModule for NullBlkModule {
+ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
+ pr_info!("Rust null_blk loaded\n");
+
+ try_pin_init!(Self {
+ configfs_subsystem <- configfs::subsystem(),
+ })
+ }
+}
+
+struct NullBlkDevice;
+
+impl NullBlkDevice {
+ fn new(
+ name: &CStr,
+ block_size: u32,
+ rotational: bool,
+ capacity_mib: u64,
+ irq_mode: IRQMode,
+ ) -> Result<GenDisk<Self>> {
+ let tagset = Arc::pin_init(TagSet::new(1, 256, 1), GFP_KERNEL)?;
+
+ let queue_data = Box::new(QueueData { irq_mode }, GFP_KERNEL)?;
+
+ gen_disk::GenDiskBuilder::new()
+ .capacity_sectors(capacity_mib << (20 - block::SECTOR_SHIFT))
+ .logical_block_size(block_size)?
+ .physical_block_size(block_size)?
+ .rotational(rotational)
+ .build(fmt!("{}", name.to_str()?), tagset, queue_data)
+ }
+}
+
+struct QueueData {
+ irq_mode: IRQMode,
+}
+
+#[vtable]
+impl Operations for NullBlkDevice {
+ type QueueData = KBox<QueueData>;
+
+ #[inline(always)]
+ fn queue_rq(queue_data: &QueueData, rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result {
+ match queue_data.irq_mode {
+ IRQMode::None => mq::Request::end_ok(rq)
+ .map_err(|_e| kernel::error::code::EIO)
+ // We take no refcounts on the request, so we expect to be able to
+ // end the request. The request reference must be unique at this
+ // point, and so `end_ok` cannot fail.
+ .expect("Fatal error - expected to be able to end request"),
+ IRQMode::Soft => mq::Request::complete(rq),
+ }
+ Ok(())
+ }
+
+ fn commit_rqs(_queue_data: &QueueData) {}
+
+ fn complete(rq: ARef<mq::Request<Self>>) {
+ mq::Request::end_ok(rq)
+ .map_err(|_e| kernel::error::code::EIO)
+ // We take no refcounts on the request, so we expect to be able to
+ // end the request. The request reference must be unique at this
+ // point, and so `end_ok` cannot fail.
+ .expect("Fatal error - expected to be able to end request");
+ }
+}
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 7af21fe67671..db1fe9772a4d 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -119,9 +119,8 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
return vio_dring_avail(dr, VDC_TX_RING_SIZE);
}
-static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int vdc_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
- struct gendisk *disk = bdev->bd_disk;
sector_t nsect = get_capacity(disk);
sector_t cylinders = nsect;
@@ -1189,7 +1188,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
}
if (port->ldc_timeout)
- mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
+ mod_delayed_work(system_percpu_wq, &port->ldc_reset_timer_work,
round_jiffies(jiffies + HZ * port->ldc_timeout));
mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
return;
@@ -1217,7 +1216,7 @@ static int __init vdc_init(void)
{
int err;
- sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
+ sunvdc_wq = alloc_workqueue("sunvdc", WQ_PERCPU, 0);
if (!sunvdc_wq)
return -ENOMEM;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index eda33c5eb5e2..416015947ae6 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -711,9 +711,9 @@ static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
return -ENOTTY;
}
-static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int floppy_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
- struct floppy_state *fs = bdev->bd_disk->private_data;
+ struct floppy_state *fs = disk->private_data;
struct floppy_struct *g;
int ret;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 8fdc26a61104..0c74a41a6753 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -201,7 +201,6 @@ struct ublk_queue {
bool force_abort;
bool canceling;
bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
- unsigned short nr_io_ready; /* how many ios setup */
spinlock_t cancel_lock;
struct ublk_device *dev;
struct ublk_io ios[];
@@ -234,7 +233,7 @@ struct ublk_device {
struct ublk_params params;
struct completion completion;
- unsigned int nr_queues_ready;
+ u32 nr_io_ready;
bool unprivileged_daemons;
struct mutex cancel_mutex;
bool canceling;
@@ -252,8 +251,7 @@ static void ublk_io_release(void *priv);
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
- const struct ublk_queue *ubq, struct ublk_io *io,
- size_t offset);
+ u16 q_id, u16 tag, struct ublk_io *io, size_t offset);
static inline unsigned int ublk_req_build_flags(struct request *req);
static inline struct ublksrv_io_desc *
@@ -532,7 +530,8 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
#endif
-static inline void __ublk_complete_rq(struct request *req);
+static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io,
+ bool need_map);
static dev_t ublk_chr_devt;
static const struct class ublk_chr_class = {
@@ -664,22 +663,44 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
}
+static inline bool ublk_dev_support_zero_copy(const struct ublk_device *ub)
+{
+ return ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY;
+}
+
static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_AUTO_BUF_REG;
}
+static inline bool ublk_dev_support_auto_buf_reg(const struct ublk_device *ub)
+{
+ return ub->dev_info.flags & UBLK_F_AUTO_BUF_REG;
+}
+
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_USER_COPY;
}
+static inline bool ublk_dev_support_user_copy(const struct ublk_device *ub)
+{
+ return ub->dev_info.flags & UBLK_F_USER_COPY;
+}
+
static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
{
return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
!ublk_support_auto_buf_reg(ubq);
}
+static inline bool ublk_dev_need_map_io(const struct ublk_device *ub)
+{
+ return !ublk_dev_support_user_copy(ub) &&
+ !ublk_dev_support_zero_copy(ub) &&
+ !ublk_dev_support_auto_buf_reg(ub);
+}
+
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
{
/*
@@ -697,6 +718,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
ublk_support_auto_buf_reg(ubq);
}
+static inline bool ublk_dev_need_req_ref(const struct ublk_device *ub)
+{
+ return ublk_dev_support_user_copy(ub) ||
+ ublk_dev_support_zero_copy(ub) ||
+ ublk_dev_support_auto_buf_reg(ub);
+}
+
static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
struct ublk_io *io)
{
@@ -711,8 +739,11 @@ static inline bool ublk_get_req_ref(struct ublk_io *io)
static inline void ublk_put_req_ref(struct ublk_io *io, struct request *req)
{
- if (refcount_dec_and_test(&io->ref))
- __ublk_complete_rq(req);
+ if (!refcount_dec_and_test(&io->ref))
+ return;
+
+ /* ublk_need_map_io() and ublk_need_req_ref() are mutually exclusive */
+ __ublk_complete_rq(req, io, false);
}
static inline bool ublk_sub_req_ref(struct ublk_io *io)
@@ -728,6 +759,11 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
return ubq->flags & UBLK_F_NEED_GET_DATA;
}
+static inline bool ublk_dev_need_get_data(const struct ublk_device *ub)
+{
+ return ub->dev_info.flags & UBLK_F_NEED_GET_DATA;
+}
+
/* Called in slow path only, keep it noinline for trace purpose */
static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub)
{
@@ -764,11 +800,9 @@ static inline int __ublk_queue_cmd_buf_size(int depth)
return round_up(depth * sizeof(struct ublksrv_io_desc), PAGE_SIZE);
}
-static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
+static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub)
{
- struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
-
- return __ublk_queue_cmd_buf_size(ubq->q_depth);
+ return __ublk_queue_cmd_buf_size(ub->dev_info.queue_depth);
}
static int ublk_max_cmd_buf_size(void)
@@ -1019,13 +1053,13 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
return rq_bytes;
}
-static int ublk_unmap_io(const struct ublk_queue *ubq,
+static int ublk_unmap_io(bool need_map,
const struct request *req,
const struct ublk_io *io)
{
const unsigned int rq_bytes = blk_rq_bytes(req);
- if (!ublk_need_map_io(ubq))
+ if (!need_map)
return rq_bytes;
if (ublk_need_unmap_req(req)) {
@@ -1072,13 +1106,8 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
{
struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
struct ublk_io *io = &ubq->ios[req->tag];
- enum req_op op = req_op(req);
u32 ublk_op;
- if (!ublk_queue_is_zoned(ubq) &&
- (op_is_zone_mgmt(op) || op == REQ_OP_ZONE_APPEND))
- return BLK_STS_IOERR;
-
switch (req_op(req)) {
case REQ_OP_READ:
ublk_op = UBLK_IO_OP_READ;
@@ -1117,10 +1146,9 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
}
/* todo: handle partial completion */
-static inline void __ublk_complete_rq(struct request *req)
+static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io,
+ bool need_map)
{
- struct ublk_queue *ubq = req->mq_hctx->driver_data;
- struct ublk_io *io = &ubq->ios[req->tag];
unsigned int unmapped_bytes;
blk_status_t res = BLK_STS_OK;
@@ -1144,7 +1172,7 @@ static inline void __ublk_complete_rq(struct request *req)
goto exit;
/* for READ request, writing data in iod->addr to rq buffers */
- unmapped_bytes = ublk_unmap_io(ubq, req, io);
+ unmapped_bytes = ublk_unmap_io(need_map, req, io);
/*
* Extremely impossible since we got data filled in just before
@@ -1500,9 +1528,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
{
int i;
- /* All old ioucmds have to be completed */
- ubq->nr_io_ready = 0;
-
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -1551,7 +1576,7 @@ static void ublk_reset_ch_dev(struct ublk_device *ub)
/* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */
ub->mm = NULL;
- ub->nr_queues_ready = 0;
+ ub->nr_io_ready = 0;
ub->unprivileged_daemons = false;
ub->ublksrv_tgid = -1;
}
@@ -1775,23 +1800,23 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
__func__, q_id, current->pid, vma->vm_start,
phys_off, (unsigned long)sz);
- if (sz != ublk_queue_cmd_buf_size(ub, q_id))
+ if (sz != ublk_queue_cmd_buf_size(ub))
return -EINVAL;
pfn = virt_to_phys(ublk_queue_cmd_buf(ub, q_id)) >> PAGE_SHIFT;
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
}
-static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
+static void __ublk_fail_req(struct ublk_device *ub, struct ublk_io *io,
struct request *req)
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
- if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+ if (ublk_nosrv_should_reissue_outstanding(ub))
blk_mq_requeue_request(req, false);
else {
io->res = -EIO;
- __ublk_complete_rq(req);
+ __ublk_complete_rq(req, io, ublk_dev_need_map_io(ub));
}
}
@@ -1811,7 +1836,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
struct ublk_io *io = &ubq->ios[i];
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
- __ublk_fail_req(ubq, io, io->req);
+ __ublk_fail_req(ub, io, io->req);
}
}
@@ -1916,9 +1941,11 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
ublk_cancel_cmd(ubq, pdu->tag, issue_flags);
}
-static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+static inline bool ublk_dev_ready(const struct ublk_device *ub)
{
- return ubq->nr_io_ready == ubq->q_depth;
+ u32 total = (u32)ub->dev_info.nr_hw_queues * ub->dev_info.queue_depth;
+
+ return ub->nr_io_ready == total;
}
static void ublk_cancel_queue(struct ublk_queue *ubq)
@@ -2042,16 +2069,14 @@ static void ublk_reset_io_flags(struct ublk_device *ub)
}
/* device can only be started after all IOs are ready */
-static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
+static void ublk_mark_io_ready(struct ublk_device *ub)
__must_hold(&ub->mutex)
{
- ubq->nr_io_ready++;
- if (ublk_queue_ready(ubq))
- ub->nr_queues_ready++;
if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN))
ub->unprivileged_daemons = true;
- if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) {
+ ub->nr_io_ready++;
+ if (ublk_dev_ready(ub)) {
/* now we are ready for handling ublk io request */
ublk_reset_io_flags(ub);
complete_all(&ub->completion);
@@ -2122,11 +2147,11 @@ ublk_fill_io_cmd(struct ublk_io *io, struct io_uring_cmd *cmd)
}
static inline int
-ublk_config_io_buf(const struct ublk_queue *ubq, struct ublk_io *io,
+ublk_config_io_buf(const struct ublk_device *ub, struct ublk_io *io,
struct io_uring_cmd *cmd, unsigned long buf_addr,
u16 *buf_idx)
{
- if (ublk_support_auto_buf_reg(ubq))
+ if (ublk_dev_support_auto_buf_reg(ub))
return ublk_handle_auto_buf_reg(io, cmd, buf_idx);
io->addr = buf_addr;
@@ -2165,18 +2190,18 @@ static void ublk_io_release(void *priv)
}
static int ublk_register_io_buf(struct io_uring_cmd *cmd,
- const struct ublk_queue *ubq,
+ struct ublk_device *ub,
+ u16 q_id, u16 tag,
struct ublk_io *io,
unsigned int index, unsigned int issue_flags)
{
- struct ublk_device *ub = cmd->file->private_data;
struct request *req;
int ret;
- if (!ublk_support_zero_copy(ubq))
+ if (!ublk_dev_support_zero_copy(ub))
return -EINVAL;
- req = __ublk_check_and_get_req(ub, ubq, io, 0);
+ req = __ublk_check_and_get_req(ub, q_id, tag, io, 0);
if (!req)
return -EINVAL;
@@ -2192,7 +2217,8 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
static int
ublk_daemon_register_io_buf(struct io_uring_cmd *cmd,
- const struct ublk_queue *ubq, struct ublk_io *io,
+ struct ublk_device *ub,
+ u16 q_id, u16 tag, struct ublk_io *io,
unsigned index, unsigned issue_flags)
{
unsigned new_registered_buffers;
@@ -2205,9 +2231,10 @@ ublk_daemon_register_io_buf(struct io_uring_cmd *cmd,
*/
new_registered_buffers = io->task_registered_buffers + 1;
if (unlikely(new_registered_buffers >= UBLK_REFCOUNT_INIT))
- return ublk_register_io_buf(cmd, ubq, io, index, issue_flags);
+ return ublk_register_io_buf(cmd, ub, q_id, tag, io, index,
+ issue_flags);
- if (!ublk_support_zero_copy(ubq) || !ublk_rq_has_data(req))
+ if (!ublk_dev_support_zero_copy(ub) || !ublk_rq_has_data(req))
return -EINVAL;
ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index,
@@ -2229,14 +2256,14 @@ static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
return io_buffer_unregister_bvec(cmd, index, issue_flags);
}
-static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr)
+static int ublk_check_fetch_buf(const struct ublk_device *ub, __u64 buf_addr)
{
- if (ublk_need_map_io(ubq)) {
+ if (ublk_dev_need_map_io(ub)) {
/*
* FETCH_RQ has to provide IO buffer if NEED GET
* DATA is not enabled
*/
- if (!buf_addr && !ublk_need_get_data(ubq))
+ if (!buf_addr && !ublk_dev_need_get_data(ub))
return -EINVAL;
} else if (buf_addr) {
/* User copy requires addr to be unset */
@@ -2245,10 +2272,9 @@ static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr)
return 0;
}
-static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
+static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub,
struct ublk_io *io, __u64 buf_addr)
{
- struct ublk_device *ub = ubq->dev;
int ret = 0;
/*
@@ -2257,8 +2283,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
* FETCH, so it is fine even for IO_URING_F_NONBLOCK.
*/
mutex_lock(&ub->mutex);
- /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
- if (ublk_queue_ready(ubq)) {
+ /* UBLK_IO_FETCH_REQ is only allowed before dev is setup */
+ if (ublk_dev_ready(ub)) {
ret = -EBUSY;
goto out;
}
@@ -2272,28 +2298,28 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV);
ublk_fill_io_cmd(io, cmd);
- ret = ublk_config_io_buf(ubq, io, cmd, buf_addr, NULL);
+ ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL);
if (ret)
goto out;
WRITE_ONCE(io->task, get_task_struct(current));
- ublk_mark_io_ready(ub, ubq);
+ ublk_mark_io_ready(ub);
out:
mutex_unlock(&ub->mutex);
return ret;
}
-static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq,
+static int ublk_check_commit_and_fetch(const struct ublk_device *ub,
struct ublk_io *io, __u64 buf_addr)
{
struct request *req = io->req;
- if (ublk_need_map_io(ubq)) {
+ if (ublk_dev_need_map_io(ub)) {
/*
* COMMIT_AND_FETCH_REQ has to provide IO buffer if
* NEED GET DATA is not enabled or it is Read IO.
*/
- if (!buf_addr && (!ublk_need_get_data(ubq) ||
+ if (!buf_addr && (!ublk_dev_need_get_data(ub) ||
req_op(req) == REQ_OP_READ))
return -EINVAL;
} else if (req_op(req) != REQ_OP_ZONE_APPEND && buf_addr) {
@@ -2307,10 +2333,10 @@ static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq,
return 0;
}
-static bool ublk_need_complete_req(const struct ublk_queue *ubq,
+static bool ublk_need_complete_req(const struct ublk_device *ub,
struct ublk_io *io)
{
- if (ublk_need_req_ref(ubq))
+ if (ublk_dev_need_req_ref(ub))
return ublk_sub_req_ref(io);
return true;
}
@@ -2333,23 +2359,28 @@ static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io,
return ublk_start_io(ubq, req, io);
}
-static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
- unsigned int issue_flags,
- const struct ublksrv_io_cmd *ub_cmd)
+static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
{
+ /* May point to userspace-mapped memory */
+ const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
u16 buf_idx = UBLK_INVALID_BUF_IDX;
struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq;
struct ublk_io *io;
u32 cmd_op = cmd->cmd_op;
- unsigned tag = ub_cmd->tag;
+ u16 q_id = READ_ONCE(ub_src->q_id);
+ u16 tag = READ_ONCE(ub_src->tag);
+ s32 result = READ_ONCE(ub_src->result);
+ u64 addr = READ_ONCE(ub_src->addr); /* unioned with zone_append_lba */
struct request *req;
int ret;
bool compl;
+ WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED);
+
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
- __func__, cmd->cmd_op, ub_cmd->q_id, tag,
- ub_cmd->result);
+ __func__, cmd->cmd_op, q_id, tag, result);
ret = ublk_check_cmd_op(cmd_op);
if (ret)
@@ -2360,25 +2391,24 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
* so no need to validate the q_id, tag, or task
*/
if (_IOC_NR(cmd_op) == UBLK_IO_UNREGISTER_IO_BUF)
- return ublk_unregister_io_buf(cmd, ub, ub_cmd->addr,
- issue_flags);
+ return ublk_unregister_io_buf(cmd, ub, addr, issue_flags);
ret = -EINVAL;
- if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues)
+ if (q_id >= ub->dev_info.nr_hw_queues)
goto out;
- ubq = ublk_get_queue(ub, ub_cmd->q_id);
+ ubq = ublk_get_queue(ub, q_id);
- if (tag >= ubq->q_depth)
+ if (tag >= ub->dev_info.queue_depth)
goto out;
io = &ubq->ios[tag];
/* UBLK_IO_FETCH_REQ can be handled on any task, which sets io->task */
if (unlikely(_IOC_NR(cmd_op) == UBLK_IO_FETCH_REQ)) {
- ret = ublk_check_fetch_buf(ubq, ub_cmd->addr);
+ ret = ublk_check_fetch_buf(ub, addr);
if (ret)
goto out;
- ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr);
+ ret = ublk_fetch(cmd, ub, io, addr);
if (ret)
goto out;
@@ -2392,8 +2422,8 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
* so can be handled on any task
*/
if (_IOC_NR(cmd_op) == UBLK_IO_REGISTER_IO_BUF)
- return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr,
- issue_flags);
+ return ublk_register_io_buf(cmd, ub, q_id, tag, io,
+ addr, issue_flags);
goto out;
}
@@ -2414,24 +2444,24 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
switch (_IOC_NR(cmd_op)) {
case UBLK_IO_REGISTER_IO_BUF:
- return ublk_daemon_register_io_buf(cmd, ubq, io, ub_cmd->addr,
+ return ublk_daemon_register_io_buf(cmd, ub, q_id, tag, io, addr,
issue_flags);
case UBLK_IO_COMMIT_AND_FETCH_REQ:
- ret = ublk_check_commit_and_fetch(ubq, io, ub_cmd->addr);
+ ret = ublk_check_commit_and_fetch(ub, io, addr);
if (ret)
goto out;
- io->res = ub_cmd->result;
+ io->res = result;
req = ublk_fill_io_cmd(io, cmd);
- ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, &buf_idx);
- compl = ublk_need_complete_req(ubq, io);
+ ret = ublk_config_io_buf(ub, io, cmd, addr, &buf_idx);
+ compl = ublk_need_complete_req(ub, io);
/* can't touch 'ublk_io' any more */
if (buf_idx != UBLK_INVALID_BUF_IDX)
io_buffer_unregister_bvec(cmd, buf_idx, issue_flags);
if (req_op(req) == REQ_OP_ZONE_APPEND)
- req->__sector = ub_cmd->zone_append_lba;
+ req->__sector = addr;
if (compl)
- __ublk_complete_rq(req);
+ __ublk_complete_rq(req, io, ublk_dev_need_map_io(ub));
if (ret)
goto out;
@@ -2443,7 +2473,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
* request
*/
req = ublk_fill_io_cmd(io, cmd);
- ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, NULL);
+ ret = ublk_config_io_buf(ub, io, cmd, addr, NULL);
WARN_ON_ONCE(ret);
if (likely(ublk_get_data(ubq, io, req))) {
__ublk_prep_compl_io_cmd(io, req);
@@ -2463,16 +2493,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
}
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
- const struct ublk_queue *ubq, struct ublk_io *io, size_t offset)
+ u16 q_id, u16 tag, struct ublk_io *io, size_t offset)
{
- unsigned tag = io - ubq->ios;
struct request *req;
/*
* can't use io->req in case of concurrent UBLK_IO_COMMIT_AND_FETCH_REQ,
* which would overwrite it with io->cmd
*/
- req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
+ req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
if (!req)
return NULL;
@@ -2494,26 +2523,6 @@ fail_put:
return NULL;
}
-static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
- unsigned int issue_flags)
-{
- /*
- * Not necessary for async retry, but let's keep it simple and always
- * copy the values to avoid any potential reuse.
- */
- const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
- const struct ublksrv_io_cmd ub_cmd = {
- .q_id = READ_ONCE(ub_src->q_id),
- .tag = READ_ONCE(ub_src->tag),
- .result = READ_ONCE(ub_src->result),
- .addr = READ_ONCE(ub_src->addr)
- };
-
- WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED);
-
- return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
-}
-
static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
@@ -2583,17 +2592,14 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb,
return ERR_PTR(-EINVAL);
ubq = ublk_get_queue(ub, q_id);
- if (!ubq)
- return ERR_PTR(-EINVAL);
-
- if (!ublk_support_user_copy(ubq))
+ if (!ublk_dev_support_user_copy(ub))
return ERR_PTR(-EACCES);
- if (tag >= ubq->q_depth)
+ if (tag >= ub->dev_info.queue_depth)
return ERR_PTR(-EINVAL);
*io = &ubq->ios[tag];
- req = __ublk_check_and_get_req(ub, ubq, *io, buf_off);
+ req = __ublk_check_and_get_req(ub, q_id, tag, *io, buf_off);
if (!req)
return ERR_PTR(-EINVAL);
@@ -2656,7 +2662,7 @@ static const struct file_operations ublk_ch_fops = {
static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
{
- int size = ublk_queue_cmd_buf_size(ub, q_id);
+ int size = ublk_queue_cmd_buf_size(ub);
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
int i;
@@ -2683,7 +2689,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
ubq->flags = ub->dev_info.flags;
ubq->q_id = q_id;
ubq->q_depth = ub->dev_info.queue_depth;
- size = ublk_queue_cmd_buf_size(ub, q_id);
+ size = ublk_queue_cmd_buf_size(ub);
ptr = (void *) __get_free_pages(gfp_flags, get_order(size));
if (!ptr)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index e649fa67bac1..f061420dfb10 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -829,9 +829,9 @@ out:
}
/* We provide getgeo only to please some old bootloader/partitioning tools */
-static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
+static int virtblk_getgeo(struct gendisk *disk, struct hd_geometry *geo)
{
- struct virtio_blk *vblk = bd->bd_disk->private_data;
+ struct virtio_blk *vblk = disk->private_data;
int ret = 0;
mutex_lock(&vblk->vdev_mutex);
@@ -853,7 +853,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
/* some standard values, similar to sd */
geo->heads = 1 << 6;
geo->sectors = 1 << 5;
- geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+ geo->cylinders = get_capacity(disk) >> 11;
}
out:
mutex_unlock(&vblk->vdev_mutex);
@@ -1682,7 +1682,7 @@ static int __init virtio_blk_init(void)
{
int error;
- virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
+ virtblk_wq = alloc_workqueue("virtio-blk", WQ_PERCPU, 0);
if (!virtblk_wq)
return -ENOMEM;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5babe575c288..04fc6b552c04 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -493,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg)
schedule_work(&rinfo->work);
}
-static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg)
{
/* We don't have real geometry info, but let's at least return
values consistent with the size of the device */
- sector_t nsect = get_capacity(bd->bd_disk);
+ sector_t nsect = get_capacity(disk);
sector_t cylinders = nsect;
hg->heads = 0xff;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f31652085adc..42809cf6dd82 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1085,7 +1085,7 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page,
work.entry = entry;
INIT_WORK_ONSTACK(&work.work, zram_sync_read);
- queue_work(system_unbound_wq, &work.work);
+ queue_work(system_dfl_wq, &work.work);
flush_work(&work.work);
destroy_work_on_stack(&work.work);