diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-10 09:26:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-10 09:26:55 -0700 |
commit | dd76a786af1f09e9122e150d30156e094e2a94b4 (patch) | |
tree | 97ff3d0d45d8df3294d3ec913eec728eb1c2ab57 | |
parent | e7990d45bb88c2f0565b5ee4c32eefe81653faff (diff) | |
parent | 360f92c2443073143467a0088daffec96a17910b (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe:
"A small collection of fixes that should go in before -rc1. The pull
request contains:
- A two patch fix for a regression with block enabled tagging caused
by a commit in the initial pull request. One patch is from Martin
and ensures that SCSI doesn't truncate 64-bit block flags, the
other one is from me and prevents us from double using struct
request queuelist for both completion and busy tags. This caused
anything from a boot crash for some, to crashes under load.
- A blk-mq fix for a potential soft stall when hot unplugging CPUs
with busy IO.
- percpu_counter fix is listed in here, that caused a suspend issue
with virtio-blk due to percpu counters having an inconsistent state
during CPU removal. Andrew sent this in separately a few days ago,
but it's here. JFYI.
- A few fixes for block integrity from Martin.
- A ratelimit fix for loop from Mike Galbraith, to avoid spewing too
much in error cases"
* 'for-linus' of git://git.kernel.dk/linux-block:
block: fix regression with block enabled tagging
scsi: Make sure cmd_flags are 64-bit
block: Ensure we only enable integrity metadata for reads and writes
block: Fix integrity verification
block: Fix for_each_bvec()
drivers/block/loop.c: ratelimit error messages
blk-mq: fix potential stall during CPU unplug with IO pending
percpu_counter: fix bad counter state during suspend
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 8 | ||||
-rw-r--r-- | block/blk-softirq.c | 17 | ||||
-rw-r--r-- | block/blk.h | 2 | ||||
-rw-r--r-- | block/elevator.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 8 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 4 | ||||
-rw-r--r-- | fs/bio-integrity.c | 22 | ||||
-rw-r--r-- | include/linux/bio.h | 6 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 13 | ||||
-rw-r--r-- | include/scsi/scsi_device.h | 4 |
12 files changed, 53 insertions, 37 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 34d7c196338b..a0e3096c4bb5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1307,7 +1307,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) struct request_list *rl = blk_rq_rl(req); BUG_ON(!list_empty(&req->queuelist)); - BUG_ON(!hlist_unhashed(&req->hash)); + BUG_ON(ELV_ON_HASH(req)); blk_free_request(rl, req); freed_request(rl, flags); diff --git a/block/blk-mq.c b/block/blk-mq.c index b1bcc619d0ea..1d2a9bdbee57 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -956,6 +956,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, unsigned int cpu) { struct blk_mq_hw_ctx *hctx = data; + struct request_queue *q = hctx->queue; struct blk_mq_ctx *ctx; LIST_HEAD(tmp); @@ -965,7 +966,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, /* * Move ctx entries to new CPU, if this one is going away. */ - ctx = __blk_mq_get_ctx(hctx->queue, cpu); + ctx = __blk_mq_get_ctx(q, cpu); spin_lock(&ctx->lock); if (!list_empty(&ctx->rq_list)) { @@ -977,7 +978,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, if (list_empty(&tmp)) return; - ctx = blk_mq_get_ctx(hctx->queue); + ctx = blk_mq_get_ctx(q); spin_lock(&ctx->lock); while (!list_empty(&tmp)) { @@ -988,10 +989,13 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, list_move_tail(&rq->queuelist, &ctx->rq_list); } + hctx = q->mq_ops->map_queue(q, ctx->cpu); blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); blk_mq_put_ctx(ctx); + + blk_mq_run_hw_queue(hctx, true); } static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ebd6b6f1bdeb..53b1737e978d 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -30,8 +30,8 @@ static void blk_done_softirq(struct softirq_action *h) while (!list_empty(&local_list)) { struct request *rq; - rq = list_entry(local_list.next, struct request, queuelist); - list_del_init(&rq->queuelist); + rq = list_entry(local_list.next, struct request, ipi_list); + list_del_init(&rq->ipi_list); rq->q->softirq_done_fn(rq); } } @@ -45,14 +45,9 @@ static void trigger_softirq(void *data) local_irq_save(flags); list = this_cpu_ptr(&blk_cpu_done); - /* - * We reuse queuelist for a list of requests to process. Since the - * queuelist is used by the block layer only for requests waiting to be - * submitted to the device it is unused now. - */ - list_add_tail(&rq->queuelist, list); + list_add_tail(&rq->ipi_list, list); - if (list->next == &rq->queuelist) + if (list->next == &rq->ipi_list) raise_softirq_irqoff(BLOCK_SOFTIRQ); local_irq_restore(flags); @@ -141,7 +136,7 @@ void __blk_complete_request(struct request *req) struct list_head *list; do_local: list = this_cpu_ptr(&blk_cpu_done); - list_add_tail(&req->queuelist, list); + list_add_tail(&req->ipi_list, list); /* * if the list only contains our just added request, @@ -149,7 +144,7 @@ do_local: * entries there, someone already raised the irq but it * hasn't run yet. */ - if (list->next == &req->queuelist) + if (list->next == &req->ipi_list) raise_softirq_irqoff(BLOCK_SOFTIRQ); } else if (raise_blk_irq(ccpu, req)) goto do_local; diff --git a/block/blk.h b/block/blk.h index d23b415b8a28..1d880f1f957f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -78,7 +78,7 @@ static inline void blk_clear_rq_complete(struct request *rq) /* * Internal elevator interface */ -#define ELV_ON_HASH(rq) hash_hashed(&(rq)->hash) +#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) void blk_insert_flush(struct request *rq); void blk_abort_flushes(struct request_queue *q); diff --git a/block/elevator.c b/block/elevator.c index 42c45a7d6714..1e01b66a0b92 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -247,6 +247,7 @@ EXPORT_SYMBOL(elevator_exit); static inline void __elv_rqhash_del(struct request *rq) { hash_del(&rq->hash); + rq->cmd_flags &= ~REQ_HASHED; } static void elv_rqhash_del(struct request_queue *q, struct request *rq) @@ -261,6 +262,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq) BUG_ON(ELV_ON_HASH(rq)); hash_add(e->hash, &rq->hash, rq_hash_key(rq)); + rq->cmd_flags |= REQ_HASHED; } static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 66e8c3b94ef3..f70a230a2945 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -237,7 +237,7 @@ static int __do_lo_send_write(struct file *file, file_end_write(file); if (likely(bw == len)) return 0; - printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", + printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", (unsigned long long)pos, len); if (bw >= 0) bw = -EIO; @@ -277,7 +277,7 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, return __do_lo_send_write(lo->lo_backing_file, page_address(page), bvec->bv_len, pos); - printk(KERN_ERR "loop: Transfer error at byte offset %llu, " + printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, " "length %i.\n", (unsigned long long)pos, bvec->bv_len); if (ret > 0) ret = -EIO; @@ -316,7 +316,7 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) out: return ret; fail: - printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); + printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); ret = -ENOMEM; goto out; } @@ -345,7 +345,7 @@ lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, size = p->bsize; if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { - printk(KERN_ERR "loop: transfer error block %ld\n", + printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n", page->index); size = -EINVAL; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 5681c05ac506..65a123d9c676 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -184,7 +184,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) */ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, int timeout, int retries, int flags, + unsigned char *sense, int timeout, int retries, u64 flags, int *resid) { struct request *req; @@ -235,7 +235,7 @@ EXPORT_SYMBOL(scsi_execute); int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, - int *resid, int flags) + int *resid, u64 flags) { char *sense = NULL; int result; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 29696b78d1f4..1c2ce0c87711 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -182,6 +182,9 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw) */ int bio_integrity_enabled(struct bio *bio) { + if (!bio_is_rw(bio)) + return 0; + /* Already protected? */ if (bio_integrity(bio)) return 0; @@ -309,10 +312,9 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity_exchg bix; - struct bio_vec bv; - struct bvec_iter iter; + struct bio_vec *bv; sector_t sector; - unsigned int sectors, ret = 0; + unsigned int sectors, ret = 0, i; void *prot_buf = bio->bi_integrity->bip_buf; if (operate) @@ -323,16 +325,16 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate) bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; - bio_for_each_segment(bv, bio, iter) { - void *kaddr = kmap_atomic(bv.bv_page); - bix.data_buf = kaddr + bv.bv_offset; - bix.data_size = bv.bv_len; + bio_for_each_segment_all(bv, bio, i) { + void *kaddr = kmap_atomic(bv->bv_page); + bix.data_buf = kaddr + bv->bv_offset; + bix.data_size = bv->bv_len; bix.prot_buf = prot_buf; bix.sector = sector; - if (operate) { + if (operate) bi->generate_fn(&bix); - } else { + else { ret = bi->verify_fn(&bix); if (ret) { kunmap_atomic(kaddr); @@ -340,7 +342,7 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate) } } - sectors = bv.bv_len / bi->sector_size; + sectors = bv->bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5a4d39b4686b..5aa372a7380c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -216,9 +216,9 @@ static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter, } #define for_each_bvec(bvl, bio_vec, iter, start) \ - for ((iter) = start; \ - (bvl) = bvec_iter_bvec((bio_vec), (iter)), \ - (iter).bi_size; \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bbc3a6c88fce..aa0eaa2d0bd8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -189,6 +189,7 @@ enum rq_flag_bits { __REQ_KERNEL, /* direct IO to kernel pages */ __REQ_PM, /* runtime pm request */ __REQ_END, /* last of chain of requests */ + __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_NR_BITS, /* stops here */ }; @@ -241,5 +242,6 @@ enum rq_flag_bits { #define REQ_KERNEL (1ULL << __REQ_KERNEL) #define REQ_PM (1ULL << __REQ_PM) #define REQ_END (1ULL << __REQ_END) +#define REQ_HASHED (1ULL << __REQ_HASHED) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1e1fa3f93d5f..99617cf7dd1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,7 +118,18 @@ struct request { struct bio *bio; struct bio *biotail; - struct hlist_node hash; /* merge hash */ + /* + * The hash is used inside the scheduler, and killed once the + * request reaches the dispatch list. The ipi_list is only used + * to queue the request for softirq completion, which is long + * after the request has been unhashed (and even removed from + * the dispatch list). + */ + union { + struct hlist_node hash; /* merge hash */ + struct list_head ipi_list; + }; + /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 4e845b80efd3..5853c913d2b0 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -423,11 +423,11 @@ extern int scsi_is_target_device(const struct device *); extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, unsigned char *sense, int timeout, int retries, - int flag, int *resid); + u64 flags, int *resid); extern int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, - int retries, int *resid, int flags); + int retries, int *resid, u64 flags); static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, |