From ff06db1efb2ad6db06eb5b99b88a0c15a9cc9b0e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 16 Jun 2016 09:53:58 +0200 Subject: floppy: fix open(O_ACCMODE) for ioctl-only open Commit 09954bad4 ("floppy: refactor open() flags handling"), as a side-effect, causes open(/dev/fdX, O_ACCMODE) to fail. It turns out that this is being used setfdprm userspace for ioctl-only open(). Reintroduce back the original behavior wrt !(FMODE_READ|FMODE_WRITE) modes, while still keeping the original O_NDELAY bug fixed. Cc: stable@vger.kernel.org # v4.5+ Reported-by: Wim Osterholt Tested-by: Wim Osterholt Signed-off-by: Jiri Kosina Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c557057fe8ae..b71a9c767009 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; - if (!(mode & (FMODE_READ|FMODE_WRITE))) { - res = -EINVAL; - goto out; - } - res = -ENXIO; if (!floppy_track_buffer) { @@ -3711,13 +3706,15 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } res = -EROFS; -- cgit v1.2.3 From 97240963eb308d8d21a89c0459822f7ea98463b4 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 27 May 2016 12:59:35 +0200 Subject: nbd: fix race in ioctl Quentin ran into this bug: WARNING: CPU: 64 PID: 10085 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x65/0x80 sysfs: cannot create duplicate filename '/devices/virtual/block/nbd3/pid' Modules linked in: nbd CPU: 64 PID: 10085 Comm: qemu-nbd Tainted: G D 4.6.0+ #7 0000000000000000 ffff8820330bba68 ffffffff814b8791 ffff8820330bbac8 0000000000000000 ffff8820330bbab8 ffffffff810d04ab ffff8820330bbaa8 0000001f00000296 0000000000017681 ffff8810380bf000 ffffffffa0001790 Call Trace: [] dump_stack+0x4d/0x6c [] __warn+0xdb/0x100 [] warn_slowpath_fmt+0x44/0x50 [] sysfs_warn_dup+0x65/0x80 [] sysfs_add_file_mode_ns+0x172/0x180 [] sysfs_create_file_ns+0x25/0x30 [] device_create_file+0x36/0x90 [] __nbd_ioctl+0x32d/0x9b0 [nbd] [] ? find_next_bit+0x18/0x20 [] ? select_idle_sibling+0xe9/0x120 [] ? __enqueue_entity+0x67/0x70 [] ? enqueue_task_fair+0x630/0xe20 [] ? resched_curr+0x36/0x70 [] ? check_preempt_curr+0x78/0x90 [] ? ttwu_do_wakeup+0x12/0x80 [] ? ttwu_do_activate.constprop.86+0x61/0x70 [] ? try_to_wake_up+0x185/0x2d0 [] ? default_wake_function+0xd/0x10 [] ? autoremove_wake_function+0x11/0x40 [] nbd_ioctl+0x67/0x94 [nbd] [] blkdev_ioctl+0x14d/0x940 [] ? put_pipe_info+0x22/0x60 [] block_ioctl+0x3c/0x40 [] do_vfs_ioctl+0x8d/0x5e0 [] ? ____fput+0x9/0x10 [] ? task_work_run+0x72/0x90 [] SyS_ioctl+0x47/0x80 [] entry_SYSCALL_64_fastpath+0x17/0x93 ---[ end trace 7899b295e4f850c8 ]--- It seems fairly obvious that device_create_file() is not being protected from being run concurrently on the same nbd. Quentin found the following relevant commits: 1a2ad21 nbd: add locking to nbd_ioctl 90b8f28 [PATCH] end of methods switch: remove the old ones d4430d6 [PATCH] beginning of methods conversion 08f8585 [PATCH] move block_device_operations to blkdev.h It would seem that the race was introduced in the process of moving nbd from BKL to unlocked ioctls. By setting nbd->task_recv while the mutex is held, we can prevent other processes from running concurrently (since nbd->task_recv is also checked while the mutex is held). Reported-and-tested-by: Quentin Casasnovas Cc: Markus Pargmann Cc: Paul Clements Cc: Pavel Machek Cc: Jens Axboe Cc: Al Viro Signed-off-by: Vegard Nossum Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6f55b262b5ce..a9e398019f38 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -451,14 +451,9 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) sk_set_memalloc(nbd->sock->sk); - nbd->task_recv = current; - ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - - nbd->task_recv = NULL; - return ret; } @@ -477,9 +472,6 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) nbd_size_clear(nbd, bdev); device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - - nbd->task_recv = NULL; - return ret; } @@ -788,6 +780,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!nbd->sock) return -EINVAL; + /* We have to claim the device under the lock */ + nbd->task_recv = current; mutex_unlock(&nbd->tx_lock); nbd_parse_flags(nbd, bdev); @@ -796,6 +790,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_name(nbd)); if (IS_ERR(thread)) { mutex_lock(&nbd->tx_lock); + nbd->task_recv = NULL; return PTR_ERR(thread); } @@ -805,6 +800,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, kthread_stop(thread); mutex_lock(&nbd->tx_lock); + nbd->task_recv = NULL; sock_shutdown(nbd); nbd_clear_que(nbd); -- cgit v1.2.3 From f0225cacfe7e69ff3234a125aeb0f3d65077835c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Aug 2016 16:10:00 +0200 Subject: loop: don't try to use AIO for discards Fix a fat-fingered conversion to the req_op accessors, and also use a switch statement to make it more obvious what is being checked. Signed-off-by: Christoph Hellwig Reported-by: Dave Chinner Fixes: c2df40 ("drivers: use req op accessor"); Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 075377eee0c0..91c2c881cb49 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1659,11 +1659,15 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (lo->lo_state != Lo_bound) return -EIO; - if (lo->use_dio && (req_op(cmd->rq) != REQ_OP_FLUSH || - req_op(cmd->rq) == REQ_OP_DISCARD)) - cmd->use_aio = true; - else + switch (req_op(cmd->rq)) { + case REQ_OP_FLUSH: + case REQ_OP_DISCARD: cmd->use_aio = false; + break; + default: + cmd->use_aio = lo->use_dio; + break; + } queue_kthread_work(&lo->worker, &cmd->work); -- cgit v1.2.3 From c1c87c2ba9ec06d8ba9e8a26c18c67a2ba9cd9c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Aug 2016 16:10:01 +0200 Subject: loop: make do_req_filebacked more robust Use a switch statement to iterate over the possible operations and error out if it's an incorrect one. Signed-off-by: Jens Axboe --- drivers/block/loop.c | 55 +++++++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 91c2c881cb49..c9f2107f7095 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -510,14 +510,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, return 0; } - -static inline int lo_rw_simple(struct loop_device *lo, - struct request *rq, loff_t pos, bool rw) +static int do_req_filebacked(struct loop_device *lo, struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, rw); + loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; /* * lo_write_simple and lo_read_simple should have been covered @@ -528,37 +524,30 @@ static inline int lo_rw_simple(struct loop_device *lo, * of the req at one time. And direct read IO doesn't need to * run flush_dcache_page(). */ - if (rw == WRITE) - return lo_write_simple(lo, rq, pos); - else - return lo_read_simple(lo, rq, pos); -} - -static int do_req_filebacked(struct loop_device *lo, struct request *rq) -{ - loff_t pos; - int ret; - - pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; - - if (op_is_write(req_op(rq))) { - if (req_op(rq) == REQ_OP_FLUSH) - ret = lo_req_flush(lo, rq); - else if (req_op(rq) == REQ_OP_DISCARD) - ret = lo_discard(lo, rq, pos); - else if (lo->transfer) - ret = lo_write_transfer(lo, rq, pos); + switch (req_op(rq)) { + case REQ_OP_FLUSH: + return lo_req_flush(lo, rq); + case REQ_OP_DISCARD: + return lo_discard(lo, rq, pos); + case REQ_OP_WRITE: + if (lo->transfer) + return lo_write_transfer(lo, rq, pos); + else if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, WRITE); else - ret = lo_rw_simple(lo, rq, pos, WRITE); - - } else { + return lo_write_simple(lo, rq, pos); + case REQ_OP_READ: if (lo->transfer) - ret = lo_read_transfer(lo, rq, pos); + return lo_read_transfer(lo, rq, pos); + else if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, READ); else - ret = lo_rw_simple(lo, rq, pos, READ); + return lo_read_simple(lo, rq, pos); + default: + WARN_ON_ONCE(1); + return -EIO; + break; } - - return ret; } struct switch_request { -- cgit v1.2.3 From abf545484d31b68777a85c5c8f5b4bcde08283eb Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 4 Aug 2016 14:23:34 -0600 Subject: mm/block: convert rw_page users to bio op use The rw_page users were not converted to use bio/req ops. As a result bdev_write_page is not passing down REQ_OP_WRITE and the IOs will be sent down as reads. Signed-off-by: Mike Christie Fixes: 4e1b2d52a80d ("block, fs, drivers: remove REQ_OP compat defs and related code") Modified by me to: 1) Drop op_flags passing into ->rw_page(), as we don't use it. 2) Make op_is_write() and friends safe to use for !CONFIG_BLOCK Signed-off-by: Jens Axboe --- drivers/block/brd.c | 17 +++++++---------- drivers/block/zram/zram_drv.c | 28 +++++++++++++++------------- drivers/nvdimm/btt.c | 18 +++++++++--------- drivers/nvdimm/pmem.c | 12 ++++++------ 4 files changed, 37 insertions(+), 38 deletions(-) (limited to 'drivers') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3022dad24071..3439b28cce8b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -300,20 +300,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, int rw, + unsigned int len, unsigned int off, int op, sector_t sector) { void *mem; int err = 0; - if (rw != READ) { + if (op_is_write(op)) { err = copy_to_brd_setup(brd, sector, len); if (err) goto out; } mem = kmap_atomic(page); - if (rw == READ) { + if (!op_is_write(op)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -330,7 +330,6 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) { struct block_device *bdev = bio->bi_bdev; struct brd_device *brd = bdev->bd_disk->private_data; - int rw; struct bio_vec bvec; sector_t sector; struct bvec_iter iter; @@ -347,14 +346,12 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) goto out; } - rw = bio_data_dir(bio); - bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; int err; err = brd_do_bvec(brd, bvec.bv_page, len, - bvec.bv_offset, rw, sector); + bvec.bv_offset, bio_op(bio), sector); if (err) goto io_error; sector += len >> SECTOR_SHIFT; @@ -369,11 +366,11 @@ io_error: } static int brd_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { struct brd_device *brd = bdev->bd_disk->private_data; - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, err); + int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector); + page_endio(page, op, err); return err; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7454cf188c8e..ca29649c4b08 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -843,15 +843,15 @@ static void zram_bio_discard(struct zram *zram, u32 index, } static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, int rw) + int offset, int op) { unsigned long start_time = jiffies; int ret; - generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, + generic_start_io_acct(op, bvec->bv_len >> SECTOR_SHIFT, &zram->disk->part0); - if (rw == READ) { + if (!op_is_write(op)) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset); } else { @@ -859,10 +859,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset); } - generic_end_io_acct(rw, &zram->disk->part0, start_time); + generic_end_io_acct(op, &zram->disk->part0, start_time); if (unlikely(ret)) { - if (rw == READ) + if (!op_is_write(op)) atomic64_inc(&zram->stats.failed_reads); else atomic64_inc(&zram->stats.failed_writes); @@ -873,7 +873,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, static void __zram_make_request(struct zram *zram, struct bio *bio) { - int offset, rw; + int offset; u32 index; struct bio_vec bvec; struct bvec_iter iter; @@ -888,7 +888,6 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) return; } - rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { int max_transfer_size = PAGE_SIZE - offset; @@ -903,15 +902,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = max_transfer_size; bv.bv_offset = bvec.bv_offset; - if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) + if (zram_bvec_rw(zram, &bv, index, offset, + bio_op(bio)) < 0) goto out; bv.bv_len = bvec.bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) + if (zram_bvec_rw(zram, &bv, index + 1, 0, + bio_op(bio)) < 0) goto out; } else - if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) + if (zram_bvec_rw(zram, &bvec, index, offset, + bio_op(bio)) < 0) goto out; update_position(&index, &offset, &bvec); @@ -968,7 +970,7 @@ static void zram_slot_free_notify(struct block_device *bdev, } static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { int offset, err = -EIO; u32 index; @@ -992,7 +994,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - err = zram_bvec_rw(zram, &bv, index, offset, rw); + err = zram_bvec_rw(zram, &bv, index, offset, op); put_zram: zram_meta_put(zram); out: @@ -1005,7 +1007,7 @@ out: * (e.g., SetPageError, set_page_dirty and extra works). */ if (err == 0) - page_endio(page, rw, 0); + page_endio(page, op, 0); return err; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 9dce03f420eb..7cf3bdfaf809 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1133,11 +1133,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, struct page *page, unsigned int len, unsigned int off, - int rw, sector_t sector) + int op, sector_t sector) { int ret; - if (rw == READ) { + if (!op_is_write(op)) { ret = btt_read_pg(btt, bip, page, off, sector, len); flush_dcache_page(page); } else { @@ -1155,7 +1155,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) struct bvec_iter iter; unsigned long start; struct bio_vec bvec; - int err = 0, rw; + int err = 0; bool do_acct; /* @@ -1170,7 +1170,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) } do_acct = nd_iostat_start(bio, &start); - rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; @@ -1181,11 +1180,12 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) BUG_ON(len % btt->sector_size); err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, - rw, iter.bi_sector); + bio_op(bio), iter.bi_sector); if (err) { dev_info(&btt->nd_btt->dev, "io error in %s sector %lld, len %d,\n", - (rw == READ) ? "READ" : "WRITE", + (op_is_write(bio_op(bio))) ? "WRITE" : + "READ", (unsigned long long) iter.bi_sector, len); bio->bi_error = err; break; @@ -1200,12 +1200,12 @@ out: } static int btt_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { struct btt *btt = bdev->bd_disk->private_data; - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, rw, sector); - page_endio(page, rw & WRITE, 0); + btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, op, sector); + page_endio(page, op, 0); return 0; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b511099457db..d64d92481c1d 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -67,7 +67,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, } static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, int rw, + unsigned int len, unsigned int off, int op, sector_t sector) { int rc = 0; @@ -79,7 +79,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (rw == READ) { + if (!op_is_write(op)) { if (unlikely(bad_pmem)) rc = -EIO; else { @@ -134,7 +134,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, bio_data_dir(bio), + bvec.bv_offset, bio_op(bio), iter.bi_sector); if (rc) { bio->bi_error = rc; @@ -152,12 +152,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, int rw) + struct page *page, int op) { struct pmem_device *pmem = bdev->bd_queue->queuedata; int rc; - rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector); + rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, op, sector); /* * The ->rw_page interface is subtle and tricky. The core @@ -166,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, * caused by double completion. */ if (rc == 0) - page_endio(page, rw & WRITE, 0); + page_endio(page, op, 0); return rc; } -- cgit v1.2.3